@@ -9388,6 +9388,96 @@ pub unsafe fn _mm_maskz_alignr_epi8(k: __mmask16, a: __m128i, b: __m128i, imm8:
9388
9388
transmute(simd_select_bitmask(k, r.as_i8x16(), zero))
9389
9389
}
9390
9390
9391
+ /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9392
+ ///
9393
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi16_storeu_epi8&expand=1812)
9394
+ #[inline]
9395
+ #[target_feature(enable = "avx512bw")]
9396
+ #[cfg_attr(test, assert_instr(vpmovswb))]
9397
+ pub unsafe fn _mm512_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
9398
+ vpmovswbmem(mem_addr as *mut i8, a.as_i16x32(), k);
9399
+ }
9400
+
9401
+ /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9402
+ ///
9403
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi16_storeu_epi8&expand=1811)
9404
+ #[inline]
9405
+ #[target_feature(enable = "avx512bw,avx512vl")]
9406
+ #[cfg_attr(test, assert_instr(vpmovswb))]
9407
+ pub unsafe fn _mm256_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
9408
+ vpmovswbmem256(mem_addr as *mut i8, a.as_i16x16(), k);
9409
+ }
9410
+
9411
+ /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9412
+ ///
9413
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi16_storeu_epi8&expand=1810)
9414
+ #[inline]
9415
+ #[target_feature(enable = "avx512bw,avx512vl")]
9416
+ #[cfg_attr(test, assert_instr(vpmovswb))]
9417
+ pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
9418
+ vpmovswbmem128(mem_addr as *mut i8, a.as_i16x8(), k);
9419
+ }
9420
+
9421
+ /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9422
+ ///
9423
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi16_storeu_epi8&expand=1412)
9424
+ #[inline]
9425
+ #[target_feature(enable = "avx512bw")]
9426
+ #[cfg_attr(test, assert_instr(vpmovwb))]
9427
+ pub unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
9428
+ vpmovwbmem(mem_addr as *mut i8, a.as_i16x32(), k);
9429
+ }
9430
+
9431
+ /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9432
+ ///
9433
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_storeu_epi8&expand=1411)
9434
+ #[inline]
9435
+ #[target_feature(enable = "avx512bw,avx512vl")]
9436
+ #[cfg_attr(test, assert_instr(vpmovwb))]
9437
+ pub unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
9438
+ vpmovwbmem256(mem_addr as *mut i8, a.as_i16x16(), k);
9439
+ }
9440
+
9441
+ /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9442
+ ///
9443
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_storeu_epi8&expand=1410)
9444
+ #[inline]
9445
+ #[target_feature(enable = "avx512bw,avx512vl")]
9446
+ #[cfg_attr(test, assert_instr(vpmovwb))]
9447
+ pub unsafe fn _mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
9448
+ vpmovwbmem128(mem_addr as *mut i8, a.as_i16x8(), k);
9449
+ }
9450
+
9451
+ /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9452
+ ///
9453
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi16_storeu_epi8&expand=2047)
9454
+ #[inline]
9455
+ #[target_feature(enable = "avx512bw")]
9456
+ #[cfg_attr(test, assert_instr(vpmovuswb))]
9457
+ pub unsafe fn _mm512_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
9458
+ vpmovuswbmem(mem_addr as *mut i8, a.as_i16x32(), k);
9459
+ }
9460
+
9461
+ /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9462
+ ///
9463
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi16_storeu_epi8&expand=2046)
9464
+ #[inline]
9465
+ #[target_feature(enable = "avx512bw,avx512vl")]
9466
+ #[cfg_attr(test, assert_instr(vpmovuswb))]
9467
+ pub unsafe fn _mm256_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
9468
+ vpmovuswbmem256(mem_addr as *mut i8, a.as_i16x16(), k);
9469
+ }
9470
+
9471
+ /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9472
+ ///
9473
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi16_storeu_epi8&expand=2045)
9474
+ #[inline]
9475
+ #[target_feature(enable = "avx512bw,avx512vl")]
9476
+ #[cfg_attr(test, assert_instr(vpmovuswb))]
9477
+ pub unsafe fn _mm_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
9478
+ vpmovuswbmem128(mem_addr as *mut i8, a.as_i16x8(), k);
9479
+ }
9480
+
9391
9481
#[allow(improper_ctypes)]
9392
9482
extern "C" {
9393
9483
#[link_name = "llvm.x86.avx512.mask.paddus.w.512"]
@@ -9594,6 +9684,27 @@ extern "C" {
9594
9684
fn vpmovuswb256(a: u16x16, src: u8x16, mask: u16) -> u8x16;
9595
9685
#[link_name = "llvm.x86.avx512.mask.pmovus.wb.128"]
9596
9686
fn vpmovuswb128(a: u16x8, src: u8x16, mask: u8) -> u8x16;
9687
+
9688
+ #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.512"]
9689
+ fn vpmovswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
9690
+ #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.256"]
9691
+ fn vpmovswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
9692
+ #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.128"]
9693
+ fn vpmovswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
9694
+
9695
+ #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.512"]
9696
+ fn vpmovwbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
9697
+ #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.256"]
9698
+ fn vpmovwbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
9699
+ #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.128"]
9700
+ fn vpmovwbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
9701
+
9702
+ #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.512"]
9703
+ fn vpmovuswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
9704
+ #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.256"]
9705
+ fn vpmovuswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
9706
+ #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.128"]
9707
+ fn vpmovuswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
9597
9708
}
9598
9709
9599
9710
#[cfg(test)]
@@ -17905,4 +18016,107 @@ mod tests {
17905
18016
let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
17906
18017
assert_eq_m128i(r, e);
17907
18018
}
18019
+
18020
+ #[simd_test(enable = "avx512bw")]
18021
+ unsafe fn test_mm512_mask_cvtsepi16_storeu_epi8() {
18022
+ let a = _mm512_set1_epi16(i16::MAX);
18023
+ let mut r = _mm256_undefined_si256();
18024
+ _mm512_mask_cvtsepi16_storeu_epi8(
18025
+ &mut r as *mut _ as *mut i8,
18026
+ 0b11111111_11111111_11111111_11111111,
18027
+ a,
18028
+ );
18029
+ let e = _mm256_set1_epi8(i8::MAX);
18030
+ assert_eq_m256i(r, e);
18031
+ }
18032
+
18033
+ #[simd_test(enable = "avx512bw,avx512vl")]
18034
+ unsafe fn test_mm256_mask_cvtsepi16_storeu_epi8() {
18035
+ let a = _mm256_set1_epi16(i16::MAX);
18036
+ let mut r = _mm_undefined_si128();
18037
+ _mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
18038
+ let e = _mm_set1_epi8(i8::MAX);
18039
+ assert_eq_m128i(r, e);
18040
+ }
18041
+
18042
+ #[simd_test(enable = "avx512bw,avx512vl")]
18043
+ unsafe fn test_mm_mask_cvtsepi16_storeu_epi8() {
18044
+ let a = _mm_set1_epi16(i16::MAX);
18045
+ let mut r = _mm_set1_epi8(0);
18046
+ _mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
18047
+ #[rustfmt::skip]
18048
+ let e = _mm_set_epi8(
18049
+ 0, 0, 0, 0, 0, 0, 0, 0,
18050
+ i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
18051
+ );
18052
+ assert_eq_m128i(r, e);
18053
+ }
18054
+
18055
+ #[simd_test(enable = "avx512bw")]
18056
+ unsafe fn test_mm512_mask_cvtepi16_storeu_epi8() {
18057
+ let a = _mm512_set1_epi16(8);
18058
+ let mut r = _mm256_undefined_si256();
18059
+ _mm512_mask_cvtepi16_storeu_epi8(
18060
+ &mut r as *mut _ as *mut i8,
18061
+ 0b11111111_11111111_11111111_11111111,
18062
+ a,
18063
+ );
18064
+ let e = _mm256_set1_epi8(8);
18065
+ assert_eq_m256i(r, e);
18066
+ }
18067
+
18068
+ #[simd_test(enable = "avx512bw,avx512vl")]
18069
+ unsafe fn test_mm256_mask_cvtepi16_storeu_epi8() {
18070
+ let a = _mm256_set1_epi16(8);
18071
+ let mut r = _mm_undefined_si128();
18072
+ _mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
18073
+ let e = _mm_set1_epi8(8);
18074
+ assert_eq_m128i(r, e);
18075
+ }
18076
+
18077
+ #[simd_test(enable = "avx512bw,avx512vl")]
18078
+ unsafe fn test_mm_mask_cvtepi16_storeu_epi8() {
18079
+ let a = _mm_set1_epi16(8);
18080
+ let mut r = _mm_set1_epi8(0);
18081
+ _mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
18082
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8);
18083
+ assert_eq_m128i(r, e);
18084
+ }
18085
+
18086
+ #[simd_test(enable = "avx512bw")]
18087
+ unsafe fn test_mm512_mask_cvtusepi16_storeu_epi8() {
18088
+ let a = _mm512_set1_epi16(i16::MAX);
18089
+ let mut r = _mm256_undefined_si256();
18090
+ _mm512_mask_cvtusepi16_storeu_epi8(
18091
+ &mut r as *mut _ as *mut i8,
18092
+ 0b11111111_11111111_11111111_11111111,
18093
+ a,
18094
+ );
18095
+ let e = _mm256_set1_epi8(u8::MAX as i8);
18096
+ assert_eq_m256i(r, e);
18097
+ }
18098
+
18099
+ #[simd_test(enable = "avx512bw,avx512vl")]
18100
+ unsafe fn test_mm256_mask_cvtusepi16_storeu_epi8() {
18101
+ let a = _mm256_set1_epi16(i16::MAX);
18102
+ let mut r = _mm_undefined_si128();
18103
+ _mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
18104
+ let e = _mm_set1_epi8(u8::MAX as i8);
18105
+ assert_eq_m128i(r, e);
18106
+ }
18107
+
18108
+ #[simd_test(enable = "avx512bw,avx512vl")]
18109
+ unsafe fn test_mm_mask_cvtusepi16_storeu_epi8() {
18110
+ let a = _mm_set1_epi16(i16::MAX);
18111
+ let mut r = _mm_set1_epi8(0);
18112
+ _mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
18113
+ #[rustfmt::skip]
18114
+ let e = _mm_set_epi8(
18115
+ 0, 0, 0, 0,
18116
+ 0, 0, 0, 0,
18117
+ u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
18118
+ u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
18119
+ );
18120
+ assert_eq_m128i(r, e);
18121
+ }
17908
18122
}
0 commit comments