Skip to content

Commit 83bd696

Browse files
authored
Revert "Merge pull request #1042 from minybot/avx512"
This reverts commit c3960ea.
1 parent c3960ea commit 83bd696

File tree

6 files changed

+3790
-2633
lines changed

6 files changed

+3790
-2633
lines changed

crates/core_arch/src/x86/avx2.rs

+67-18
Original file line numberDiff line numberDiff line change
@@ -2642,25 +2642,74 @@ pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
26422642
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_epi32)
26432643
#[inline]
26442644
#[target_feature(enable = "avx2")]
2645-
#[cfg_attr(test, assert_instr(vpermilps, MASK = 9))]
2646-
#[rustc_legacy_const_generics(1)]
2645+
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
2646+
#[rustc_args_required_const(1)]
26472647
#[stable(feature = "simd_x86", since = "1.27.0")]
2648-
pub unsafe fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
2649-
static_assert_imm8!(MASK);
2650-
let r: i32x8 = simd_shuffle8(
2651-
a.as_i32x8(),
2652-
a.as_i32x8(),
2653-
[
2654-
MASK as u32 & 0b11,
2655-
(MASK as u32 >> 2) & 0b11,
2656-
(MASK as u32 >> 4) & 0b11,
2657-
(MASK as u32 >> 6) & 0b11,
2658-
(MASK as u32 & 0b11) + 4,
2659-
((MASK as u32 >> 2) & 0b11) + 4,
2660-
((MASK as u32 >> 4) & 0b11) + 4,
2661-
((MASK as u32 >> 6) & 0b11) + 4,
2662-
],
2663-
);
2648+
pub unsafe fn _mm256_shuffle_epi32(a: __m256i, imm8: i32) -> __m256i {
2649+
// simd_shuffleX requires that its selector parameter be made up of
2650+
// constant values, but we can't enforce that here. In spirit, we need
2651+
// to write a `match` on all possible values of a byte, and for each value,
2652+
// hard-code the correct `simd_shuffleX` call using only constants. We
2653+
// then hope for LLVM to do the rest.
2654+
//
2655+
// Of course, that's... awful. So we try to use macros to do it for us.
2656+
let imm8 = (imm8 & 0xFF) as u8;
2657+
2658+
let a = a.as_i32x8();
2659+
macro_rules! shuffle_done {
2660+
($x01:expr, $x23:expr, $x45:expr, $x67:expr) => {
2661+
simd_shuffle8(
2662+
a,
2663+
a,
2664+
[
2665+
$x01,
2666+
$x23,
2667+
$x45,
2668+
$x67,
2669+
4 + $x01,
2670+
4 + $x23,
2671+
4 + $x45,
2672+
4 + $x67,
2673+
],
2674+
)
2675+
};
2676+
}
2677+
macro_rules! shuffle_x67 {
2678+
($x01:expr, $x23:expr, $x45:expr) => {
2679+
match (imm8 >> 6) & 0b11 {
2680+
0b00 => shuffle_done!($x01, $x23, $x45, 0),
2681+
0b01 => shuffle_done!($x01, $x23, $x45, 1),
2682+
0b10 => shuffle_done!($x01, $x23, $x45, 2),
2683+
_ => shuffle_done!($x01, $x23, $x45, 3),
2684+
}
2685+
};
2686+
}
2687+
macro_rules! shuffle_x45 {
2688+
($x01:expr, $x23:expr) => {
2689+
match (imm8 >> 4) & 0b11 {
2690+
0b00 => shuffle_x67!($x01, $x23, 0),
2691+
0b01 => shuffle_x67!($x01, $x23, 1),
2692+
0b10 => shuffle_x67!($x01, $x23, 2),
2693+
_ => shuffle_x67!($x01, $x23, 3),
2694+
}
2695+
};
2696+
}
2697+
macro_rules! shuffle_x23 {
2698+
($x01:expr) => {
2699+
match (imm8 >> 2) & 0b11 {
2700+
0b00 => shuffle_x45!($x01, 0),
2701+
0b01 => shuffle_x45!($x01, 1),
2702+
0b10 => shuffle_x45!($x01, 2),
2703+
_ => shuffle_x45!($x01, 3),
2704+
}
2705+
};
2706+
}
2707+
let r: i32x8 = match imm8 & 0b11 {
2708+
0b00 => shuffle_x23!(0),
2709+
0b01 => shuffle_x23!(1),
2710+
0b10 => shuffle_x23!(2),
2711+
_ => shuffle_x23!(3),
2712+
};
26642713
transmute(r)
26652714
}
26662715

0 commit comments

Comments
 (0)