Skip to content

Commit 1d4c3dd

Browse files
authored
Convert the last avx512f and avx512vpclmulqdq intrinsics (#1068)
1 parent 594ff85 commit 1d4c3dd

File tree

4 files changed

+104
-116
lines changed

4 files changed

+104
-116
lines changed

crates/core_arch/src/x86/avx512f.rs

+50-50
Original file line numberDiff line numberDiff line change
@@ -21896,57 +21896,57 @@ pub unsafe fn _mm256_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m2
2189621896
transmute(simd_select_bitmask(k, r.as_f32x4(), zero))
2189721897
}
2189821898

21899-
/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with imm8, and store the result in dst.
21899+
/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the result in dst.
2190021900
///
2190121901
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_extracti64x4_epi64&expand=2473)
2190221902
#[inline]
2190321903
#[target_feature(enable = "avx512f")]
2190421904
#[cfg_attr(
2190521905
all(test, not(target_os = "windows")),
21906-
assert_instr(vextractf64x4, imm8 = 1) //should be vextracti64x4
21906+
assert_instr(vextractf64x4, IMM1 = 1) //should be vextracti64x4
2190721907
)]
21908-
#[rustc_args_required_const(1)]
21909-
pub unsafe fn _mm512_extracti64x4_epi64(a: __m512i, imm8: i32) -> __m256i {
21910-
assert!(imm8 >= 0 && imm8 <= 1);
21911-
match imm8 & 0x1 {
21908+
#[rustc_legacy_const_generics(1)]
21909+
pub unsafe fn _mm512_extracti64x4_epi64<const IMM1: i32>(a: __m512i) -> __m256i {
21910+
static_assert_imm1!(IMM1);
21911+
match IMM1 {
2191221912
0 => simd_shuffle4(a, _mm512_set1_epi64(0), [0, 1, 2, 3]),
2191321913
_ => simd_shuffle4(a, _mm512_set1_epi64(0), [4, 5, 6, 7]),
2191421914
}
2191521915
}
2191621916

21917-
/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21917+
/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2191821918
///
2191921919
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_extracti64x4_epi64&expand=2474)
2192021920
#[inline]
2192121921
#[target_feature(enable = "avx512f")]
2192221922
#[cfg_attr(
2192321923
all(test, not(target_os = "windows")),
21924-
assert_instr(vextracti64x4, IMM8 = 1)
21924+
assert_instr(vextracti64x4, IMM1 = 1)
2192521925
)]
2192621926
#[rustc_legacy_const_generics(3)]
21927-
pub unsafe fn _mm512_mask_extracti64x4_epi64<const IMM8: i32>(
21927+
pub unsafe fn _mm512_mask_extracti64x4_epi64<const IMM1: i32>(
2192821928
src: __m256i,
2192921929
k: __mmask8,
2193021930
a: __m512i,
2193121931
) -> __m256i {
21932-
static_assert_imm1!(IMM8);
21933-
let r = _mm512_extracti64x4_epi64(a, IMM8);
21932+
static_assert_imm1!(IMM1);
21933+
let r = _mm512_extracti64x4_epi64::<IMM1>(a);
2193421934
transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
2193521935
}
2193621936

21937-
/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21937+
/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2193821938
///
2193921939
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_extracti64x4_epi64&expand=2475)
2194021940
#[inline]
2194121941
#[target_feature(enable = "avx512f")]
2194221942
#[cfg_attr(
2194321943
all(test, not(target_os = "windows")),
21944-
assert_instr(vextracti64x4, IMM8 = 1)
21944+
assert_instr(vextracti64x4, IMM1 = 1)
2194521945
)]
2194621946
#[rustc_legacy_const_generics(2)]
21947-
pub unsafe fn _mm512_maskz_extracti64x4_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m256i {
21948-
static_assert_imm1!(IMM8);
21949-
let r = _mm512_extracti64x4_epi64(a, IMM8);
21947+
pub unsafe fn _mm512_maskz_extracti64x4_epi64<const IMM1: i32>(k: __mmask8, a: __m512i) -> __m256i {
21948+
static_assert_imm1!(IMM1);
21949+
let r = _mm512_extracti64x4_epi64::<IMM1>(a);
2195021950
let zero = _mm256_setzero_si256().as_i64x4();
2195121951
transmute(simd_select_bitmask(k, r.as_i64x4(), zero))
2195221952
}
@@ -22006,21 +22006,21 @@ pub unsafe fn _mm512_maskz_extractf64x4_pd<const IMM8: i32>(k: __mmask8, a: __m5
2200622006
transmute(simd_select_bitmask(k, r.as_f64x4(), zero))
2200722007
}
2200822008

22009-
/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with imm8, and store the result in dst.
22009+
/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the result in dst.
2201022010
///
2201122011
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_extracti32x4_epi32&expand=2461)
2201222012
#[inline]
2201322013
#[target_feature(enable = "avx512f")]
2201422014
#[cfg_attr(
2201522015
all(test, not(target_os = "windows")),
22016-
assert_instr(vextractf32x4, imm8 = 3) //should be vextracti32x4
22016+
assert_instr(vextractf32x4, IMM2 = 3) //should be vextracti32x4
2201722017
)]
22018-
#[rustc_args_required_const(1)]
22019-
pub unsafe fn _mm512_extracti32x4_epi32(a: __m512i, imm8: i32) -> __m128i {
22020-
assert!(imm8 >= 0 && imm8 <= 3);
22018+
#[rustc_legacy_const_generics(1)]
22019+
pub unsafe fn _mm512_extracti32x4_epi32<const IMM2: i32>(a: __m512i) -> __m128i {
22020+
static_assert_imm2!(IMM2);
2202122021
let a = a.as_i32x16();
2202222022
let undefined = _mm512_undefined_epi32().as_i32x16();
22023-
let extract: i32x4 = match imm8 & 0x3 {
22023+
let extract: i32x4 = match IMM2 {
2202422024
0 => simd_shuffle4(a, undefined, [0, 1, 2, 3]),
2202522025
1 => simd_shuffle4(a, undefined, [4, 5, 6, 7]),
2202622026
2 => simd_shuffle4(a, undefined, [8, 9, 10, 11]),
@@ -22029,97 +22029,97 @@ pub unsafe fn _mm512_extracti32x4_epi32(a: __m512i, imm8: i32) -> __m128i {
2202922029
transmute(extract)
2203022030
}
2203122031

22032-
/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22032+
/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2203322033
///
2203422034
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_extracti32x4_epi32&expand=2462)
2203522035
#[inline]
2203622036
#[target_feature(enable = "avx512f")]
2203722037
#[cfg_attr(
2203822038
all(test, not(target_os = "windows")),
22039-
assert_instr(vextracti32x4, IMM8 = 3)
22039+
assert_instr(vextracti32x4, IMM2 = 3)
2204022040
)]
2204122041
#[rustc_legacy_const_generics(3)]
22042-
pub unsafe fn _mm512_mask_extracti32x4_epi32<const IMM8: i32>(
22042+
pub unsafe fn _mm512_mask_extracti32x4_epi32<const IMM2: i32>(
2204322043
src: __m128i,
2204422044
k: __mmask8,
2204522045
a: __m512i,
2204622046
) -> __m128i {
22047-
static_assert_imm2!(IMM8);
22048-
let r = _mm512_extracti32x4_epi32(a, IMM8);
22047+
static_assert_imm2!(IMM2);
22048+
let r = _mm512_extracti32x4_epi32::<IMM2>(a);
2204922049
transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
2205022050
}
2205122051

22052-
/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22052+
/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2205322053
///
2205422054
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_extracti32x4_epi32&expand=2463)
2205522055
#[inline]
2205622056
#[target_feature(enable = "avx512f")]
2205722057
#[cfg_attr(
2205822058
all(test, not(target_os = "windows")),
22059-
assert_instr(vextracti32x4, IMM8 = 3)
22059+
assert_instr(vextracti32x4, IMM2 = 3)
2206022060
)]
2206122061
#[rustc_legacy_const_generics(2)]
22062-
pub unsafe fn _mm512_maskz_extracti32x4_epi32<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m128i {
22063-
static_assert_imm2!(IMM8);
22064-
let r = _mm512_extracti32x4_epi32(a, IMM8);
22062+
pub unsafe fn _mm512_maskz_extracti32x4_epi32<const IMM2: i32>(k: __mmask8, a: __m512i) -> __m128i {
22063+
static_assert_imm2!(IMM2);
22064+
let r = _mm512_extracti32x4_epi32::<IMM2>(a);
2206522065
let zero = _mm_setzero_si128().as_i32x4();
2206622066
transmute(simd_select_bitmask(k, r.as_i32x4(), zero))
2206722067
}
2206822068

22069-
/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with imm8, and store the result in dst.
22069+
/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the result in dst.
2207022070
///
2207122071
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extracti32x4_epi32&expand=2458)
2207222072
#[inline]
2207322073
#[target_feature(enable = "avx512f,avx512vl")]
2207422074
#[cfg_attr(
2207522075
all(test, not(target_os = "windows")),
22076-
assert_instr(vextract, imm8 = 1) //should be vextracti32x4
22076+
assert_instr(vextract, IMM1 = 1) //should be vextracti32x4
2207722077
)]
22078-
#[rustc_args_required_const(1)]
22079-
pub unsafe fn _mm256_extracti32x4_epi32(a: __m256i, imm8: i32) -> __m128i {
22080-
assert!(imm8 >= 0 && imm8 <= 1);
22078+
#[rustc_legacy_const_generics(1)]
22079+
pub unsafe fn _mm256_extracti32x4_epi32<const IMM1: i32>(a: __m256i) -> __m128i {
22080+
static_assert_imm1!(IMM1);
2208122081
let a = a.as_i32x8();
2208222082
let undefined = _mm256_undefined_si256().as_i32x8();
22083-
let extract: i32x4 = match imm8 & 0x1 {
22083+
let extract: i32x4 = match IMM1 {
2208422084
0 => simd_shuffle4(a, undefined, [0, 1, 2, 3]),
2208522085
_ => simd_shuffle4(a, undefined, [4, 5, 6, 7]),
2208622086
};
2208722087
transmute(extract)
2208822088
}
2208922089

22090-
/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22090+
/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2209122091
///
2209222092
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_extracti32x4_epi32&expand=2459)
2209322093
#[inline]
2209422094
#[target_feature(enable = "avx512f,avx512vl")]
2209522095
#[cfg_attr(
2209622096
all(test, not(target_os = "windows")),
22097-
assert_instr(vextracti32x4, IMM8 = 1)
22097+
assert_instr(vextracti32x4, IMM1 = 1)
2209822098
)]
2209922099
#[rustc_legacy_const_generics(3)]
22100-
pub unsafe fn _mm256_mask_extracti32x4_epi32<const IMM8: i32>(
22100+
pub unsafe fn _mm256_mask_extracti32x4_epi32<const IMM1: i32>(
2210122101
src: __m128i,
2210222102
k: __mmask8,
2210322103
a: __m256i,
2210422104
) -> __m128i {
22105-
static_assert_imm1!(IMM8);
22106-
let r = _mm256_extracti32x4_epi32(a, IMM8);
22105+
static_assert_imm1!(IMM1);
22106+
let r = _mm256_extracti32x4_epi32::<IMM1>(a);
2210722107
transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
2210822108
}
2210922109

22110-
/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22110+
/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2211122111
///
2211222112
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_extracti32x4_epi32&expand=2460)
2211322113
#[inline]
2211422114
#[target_feature(enable = "avx512f,avx512vl")]
2211522115
#[cfg_attr(
2211622116
all(test, not(target_os = "windows")),
22117-
assert_instr(vextracti32x4, IMM8 = 1)
22117+
assert_instr(vextracti32x4, IMM1 = 1)
2211822118
)]
2211922119
#[rustc_legacy_const_generics(2)]
22120-
pub unsafe fn _mm256_maskz_extracti32x4_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m128i {
22121-
static_assert_imm1!(IMM8);
22122-
let r = _mm256_extracti32x4_epi32(a, IMM8);
22120+
pub unsafe fn _mm256_maskz_extracti32x4_epi32<const IMM1: i32>(k: __mmask8, a: __m256i) -> __m128i {
22121+
static_assert_imm1!(IMM1);
22122+
let r = _mm256_extracti32x4_epi32::<IMM1>(a);
2212322123
let zero = _mm_setzero_si128().as_i32x4();
2212422124
transmute(simd_select_bitmask(k, r.as_i32x4(), zero))
2212522125
}
@@ -46698,7 +46698,7 @@ mod tests {
4669846698
#[simd_test(enable = "avx512f")]
4669946699
unsafe fn test_mm512_extracti32x4_epi32() {
4670046700
let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
46701-
let r = _mm512_extracti32x4_epi32(a, 0b1);
46701+
let r = _mm512_extracti32x4_epi32::<1>(a);
4670246702
let e = _mm_setr_epi32(5, 6, 7, 8);
4670346703
assert_eq_m128i(r, e);
4670446704
}
@@ -46727,7 +46727,7 @@ mod tests {
4672746727
#[simd_test(enable = "avx512f,avx512vl")]
4672846728
unsafe fn test_mm256_extracti32x4_epi32() {
4672946729
let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
46730-
let r = _mm256_extracti32x4_epi32(a, 0b1);
46730+
let r = _mm256_extracti32x4_epi32::<1>(a);
4673146731
let e = _mm_set_epi32(1, 2, 3, 4);
4673246732
assert_eq_m128i(r, e);
4673346733
}

crates/core_arch/src/x86/avx512vaes.rs

+12-12
Original file line numberDiff line numberDiff line change
@@ -262,24 +262,24 @@ mod tests {
262262
0x19BE9F660038BDB5,
263263
);
264264
let mut a_decomp = [_mm_setzero_si128(); 4];
265-
a_decomp[0] = _mm512_extracti32x4_epi32(a, 0);
266-
a_decomp[1] = _mm512_extracti32x4_epi32(a, 1);
267-
a_decomp[2] = _mm512_extracti32x4_epi32(a, 2);
268-
a_decomp[3] = _mm512_extracti32x4_epi32(a, 3);
265+
a_decomp[0] = _mm512_extracti32x4_epi32::<0>(a);
266+
a_decomp[1] = _mm512_extracti32x4_epi32::<1>(a);
267+
a_decomp[2] = _mm512_extracti32x4_epi32::<2>(a);
268+
a_decomp[3] = _mm512_extracti32x4_epi32::<3>(a);
269269
let mut k_decomp = [_mm_setzero_si128(); 4];
270-
k_decomp[0] = _mm512_extracti32x4_epi32(k, 0);
271-
k_decomp[1] = _mm512_extracti32x4_epi32(k, 1);
272-
k_decomp[2] = _mm512_extracti32x4_epi32(k, 2);
273-
k_decomp[3] = _mm512_extracti32x4_epi32(k, 3);
270+
k_decomp[0] = _mm512_extracti32x4_epi32::<0>(k);
271+
k_decomp[1] = _mm512_extracti32x4_epi32::<1>(k);
272+
k_decomp[2] = _mm512_extracti32x4_epi32::<2>(k);
273+
k_decomp[3] = _mm512_extracti32x4_epi32::<3>(k);
274274
let r = vectorized(a, k);
275275
let mut e_decomp = [_mm_setzero_si128(); 4];
276276
for i in 0..4 {
277277
e_decomp[i] = linear(a_decomp[i], k_decomp[i]);
278278
}
279-
assert_eq_m128i(_mm512_extracti32x4_epi32(r, 0), e_decomp[0]);
280-
assert_eq_m128i(_mm512_extracti32x4_epi32(r, 1), e_decomp[1]);
281-
assert_eq_m128i(_mm512_extracti32x4_epi32(r, 2), e_decomp[2]);
282-
assert_eq_m128i(_mm512_extracti32x4_epi32(r, 3), e_decomp[3]);
279+
assert_eq_m128i(_mm512_extracti32x4_epi32::<0>(r), e_decomp[0]);
280+
assert_eq_m128i(_mm512_extracti32x4_epi32::<1>(r), e_decomp[1]);
281+
assert_eq_m128i(_mm512_extracti32x4_epi32::<2>(r), e_decomp[2]);
282+
assert_eq_m128i(_mm512_extracti32x4_epi32::<3>(r), e_decomp[3]);
283283
}
284284

285285
#[simd_test(enable = "avx512vaes,avx512f")]

0 commit comments

Comments
 (0)