@@ -2565,35 +2565,65 @@ pub unsafe fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2565
2565
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_si256)
2566
2566
#[ inline]
2567
2567
#[ target_feature( enable = "avx2" ) ]
2568
- #[ cfg_attr( test, assert_instr( vpslldq, imm8 = 3 ) ) ]
2569
- #[ rustc_args_required_const ( 1 ) ]
2568
+ #[ cfg_attr( test, assert_instr( vpslldq, IMM8 = 3 ) ) ]
2569
+ #[ rustc_legacy_const_generics ( 1 ) ]
2570
2570
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
2571
- pub unsafe fn _mm256_slli_si256 ( a : __m256i , imm8 : i32 ) -> __m256i {
2572
- let a = a. as_i64x4 ( ) ;
2573
- macro_rules! call {
2574
- ( $imm8: expr) => {
2575
- vpslldq( a, $imm8)
2576
- } ;
2577
- }
2578
- transmute ( constify_imm8 ! ( imm8 * 8 , call) )
2571
+ pub unsafe fn _mm256_slli_si256 < const IMM8 : i32 > ( a : __m256i ) -> __m256i {
2572
+ static_assert_imm8 ! ( IMM8 ) ;
2573
+ _mm256_bslli_epi128 :: < IMM8 > ( a)
2579
2574
}
2580
2575
2581
2576
/// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
2582
2577
///
2583
2578
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bslli_epi128)
2584
2579
#[ inline]
2585
2580
#[ target_feature( enable = "avx2" ) ]
2586
- #[ cfg_attr( test, assert_instr( vpslldq, imm8 = 3 ) ) ]
2587
- #[ rustc_args_required_const ( 1 ) ]
2581
+ #[ cfg_attr( test, assert_instr( vpslldq, IMM8 = 3 ) ) ]
2582
+ #[ rustc_legacy_const_generics ( 1 ) ]
2588
2583
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
2589
- pub unsafe fn _mm256_bslli_epi128 ( a : __m256i , imm8 : i32 ) -> __m256i {
2590
- let a = a. as_i64x4 ( ) ;
2591
- macro_rules! call {
2592
- ( $imm8: expr) => {
2593
- vpslldq( a, $imm8)
2594
- } ;
2595
- }
2596
- transmute ( constify_imm8 ! ( imm8 * 8 , call) )
2584
+ pub unsafe fn _mm256_bslli_epi128 < const IMM8 : i32 > ( a : __m256i ) -> __m256i {
2585
+ static_assert_imm8 ! ( IMM8 ) ;
2586
+ let a = a. as_i8x32 ( ) ;
2587
+ let zero = _mm256_setzero_si256 ( ) . as_i8x32 ( ) ;
2588
+ let r: i8x32 = simd_shuffle32 (
2589
+ zero,
2590
+ a,
2591
+ [
2592
+ 32 - ( IMM8 as u32 & 0xff ) ,
2593
+ 33 - ( IMM8 as u32 & 0xff ) ,
2594
+ 34 - ( IMM8 as u32 & 0xff ) ,
2595
+ 35 - ( IMM8 as u32 & 0xff ) ,
2596
+ 36 - ( IMM8 as u32 & 0xff ) ,
2597
+ 37 - ( IMM8 as u32 & 0xff ) ,
2598
+ 38 - ( IMM8 as u32 & 0xff ) ,
2599
+ 39 - ( IMM8 as u32 & 0xff ) ,
2600
+ 40 - ( IMM8 as u32 & 0xff ) ,
2601
+ 41 - ( IMM8 as u32 & 0xff ) ,
2602
+ 42 - ( IMM8 as u32 & 0xff ) ,
2603
+ 43 - ( IMM8 as u32 & 0xff ) ,
2604
+ 44 - ( IMM8 as u32 & 0xff ) ,
2605
+ 45 - ( IMM8 as u32 & 0xff ) ,
2606
+ 46 - ( IMM8 as u32 & 0xff ) ,
2607
+ 47 - ( IMM8 as u32 & 0xff ) ,
2608
+ 48 - ( IMM8 as u32 & 0xff ) - 16 ,
2609
+ 49 - ( IMM8 as u32 & 0xff ) - 16 ,
2610
+ 50 - ( IMM8 as u32 & 0xff ) - 16 ,
2611
+ 51 - ( IMM8 as u32 & 0xff ) - 16 ,
2612
+ 52 - ( IMM8 as u32 & 0xff ) - 16 ,
2613
+ 53 - ( IMM8 as u32 & 0xff ) - 16 ,
2614
+ 54 - ( IMM8 as u32 & 0xff ) - 16 ,
2615
+ 55 - ( IMM8 as u32 & 0xff ) - 16 ,
2616
+ 56 - ( IMM8 as u32 & 0xff ) - 16 ,
2617
+ 57 - ( IMM8 as u32 & 0xff ) - 16 ,
2618
+ 58 - ( IMM8 as u32 & 0xff ) - 16 ,
2619
+ 59 - ( IMM8 as u32 & 0xff ) - 16 ,
2620
+ 60 - ( IMM8 as u32 & 0xff ) - 16 ,
2621
+ 61 - ( IMM8 as u32 & 0xff ) - 16 ,
2622
+ 62 - ( IMM8 as u32 & 0xff ) - 16 ,
2623
+ 63 - ( IMM8 as u32 & 0xff ) - 16 ,
2624
+ ] ,
2625
+ ) ;
2626
+ transmute ( r)
2597
2627
}
2598
2628
2599
2629
/// Shifts packed 32-bit integers in `a` left by the amount
@@ -2729,35 +2759,158 @@ pub unsafe fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
2729
2759
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_si256)
2730
2760
#[ inline]
2731
2761
#[ target_feature( enable = "avx2" ) ]
2732
- #[ cfg_attr( test, assert_instr( vpsrldq, imm8 = 3 ) ) ]
2733
- #[ rustc_args_required_const ( 1 ) ]
2762
+ #[ cfg_attr( test, assert_instr( vpsrldq, IMM8 = 1 ) ) ]
2763
+ #[ rustc_legacy_const_generics ( 1 ) ]
2734
2764
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
2735
- pub unsafe fn _mm256_srli_si256 ( a : __m256i , imm8 : i32 ) -> __m256i {
2736
- let a = a. as_i64x4 ( ) ;
2737
- macro_rules! call {
2738
- ( $imm8: expr) => {
2739
- vpsrldq( a, $imm8)
2740
- } ;
2741
- }
2742
- transmute ( constify_imm8 ! ( imm8 * 8 , call) )
2765
+ pub unsafe fn _mm256_srli_si256 < const IMM8 : i32 > ( a : __m256i ) -> __m256i {
2766
+ static_assert_imm8 ! ( IMM8 ) ;
2767
+ _mm256_bsrli_epi128 :: < IMM8 > ( a)
2743
2768
}
2744
2769
2745
2770
/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
2746
2771
///
2747
2772
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bsrli_epi128)
2748
2773
#[ inline]
2749
2774
#[ target_feature( enable = "avx2" ) ]
2750
- #[ cfg_attr( test, assert_instr( vpsrldq, imm8 = 3 ) ) ]
2751
- #[ rustc_args_required_const ( 1 ) ]
2775
+ #[ cfg_attr( test, assert_instr( vpsrldq, IMM8 = 1 ) ) ]
2776
+ #[ rustc_legacy_const_generics ( 1 ) ]
2752
2777
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
2753
- pub unsafe fn _mm256_bsrli_epi128 ( a : __m256i , imm8 : i32 ) -> __m256i {
2754
- let a = a. as_i64x4 ( ) ;
2755
- macro_rules! call {
2756
- ( $imm8: expr) => {
2757
- vpsrldq( a, $imm8)
2758
- } ;
2759
- }
2760
- transmute ( constify_imm8 ! ( imm8 * 8 , call) )
2778
+ pub unsafe fn _mm256_bsrli_epi128 < const IMM8 : i32 > ( a : __m256i ) -> __m256i {
2779
+ static_assert_imm8 ! ( IMM8 ) ;
2780
+ let a = a. as_i8x32 ( ) ;
2781
+ let zero = _mm256_setzero_si256 ( ) . as_i8x32 ( ) ;
2782
+ let r: i8x32 = match IMM8 % 16 {
2783
+ 0 => simd_shuffle32 (
2784
+ a,
2785
+ zero,
2786
+ [
2787
+ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 , 17 , 18 , 19 , 20 , 21 , 22 ,
2788
+ 23 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ,
2789
+ ] ,
2790
+ ) ,
2791
+ 1 => simd_shuffle32 (
2792
+ a,
2793
+ zero,
2794
+ [
2795
+ 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 32 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ,
2796
+ 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 , 32 ,
2797
+ ] ,
2798
+ ) ,
2799
+ 2 => simd_shuffle32 (
2800
+ a,
2801
+ zero,
2802
+ [
2803
+ 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 32 , 32 , 18 , 19 , 20 , 21 , 22 , 23 , 24 ,
2804
+ 25 , 26 , 27 , 28 , 29 , 30 , 31 , 32 , 32 ,
2805
+ ] ,
2806
+ ) ,
2807
+ 3 => simd_shuffle32 (
2808
+ a,
2809
+ zero,
2810
+ [
2811
+ 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 32 , 32 , 32 , 19 , 20 , 21 , 22 , 23 , 24 ,
2812
+ 25 , 26 , 27 , 28 , 29 , 30 , 31 , 32 , 32 , 32 ,
2813
+ ] ,
2814
+ ) ,
2815
+ 4 => simd_shuffle32 (
2816
+ a,
2817
+ zero,
2818
+ [
2819
+ 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 32 , 32 , 32 , 32 , 20 , 21 , 22 , 23 , 24 , 25 ,
2820
+ 26 , 27 , 28 , 29 , 30 , 31 , 32 , 32 , 32 , 32 ,
2821
+ ] ,
2822
+ ) ,
2823
+ 5 => simd_shuffle32 (
2824
+ a,
2825
+ zero,
2826
+ [
2827
+ 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 32 , 32 , 32 , 32 , 32 , 21 , 22 , 23 , 24 , 25 , 26 ,
2828
+ 27 , 28 , 29 , 30 , 31 , 32 , 32 , 32 , 32 , 32 ,
2829
+ ] ,
2830
+ ) ,
2831
+ 6 => simd_shuffle32 (
2832
+ a,
2833
+ zero,
2834
+ [
2835
+ 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 32 , 32 , 32 , 32 , 32 , 32 , 22 , 23 , 24 , 25 , 26 , 27 ,
2836
+ 28 , 29 , 30 , 31 , 32 , 32 , 32 , 32 , 32 , 32 ,
2837
+ ] ,
2838
+ ) ,
2839
+ 7 => simd_shuffle32 (
2840
+ a,
2841
+ zero,
2842
+ [
2843
+ 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 23 , 24 , 25 , 26 , 27 ,
2844
+ 28 , 29 , 30 , 31 , 32 , 32 , 32 , 32 , 32 , 32 , 32 ,
2845
+ ] ,
2846
+ ) ,
2847
+ 8 => simd_shuffle32 (
2848
+ a,
2849
+ zero,
2850
+ [
2851
+ 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 24 , 25 , 26 , 27 , 28 ,
2852
+ 29 , 30 , 31 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 ,
2853
+ ] ,
2854
+ ) ,
2855
+ 9 => simd_shuffle32 (
2856
+ a,
2857
+ zero,
2858
+ [
2859
+ 9 , 10 , 11 , 12 , 13 , 14 , 15 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 25 , 26 , 27 , 28 , 29 ,
2860
+ 30 , 31 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 ,
2861
+ ] ,
2862
+ ) ,
2863
+ 10 => simd_shuffle32 (
2864
+ a,
2865
+ zero,
2866
+ [
2867
+ 10 , 11 , 12 , 13 , 14 , 15 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 26 , 27 , 28 , 29 , 30 ,
2868
+ 31 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 ,
2869
+ ] ,
2870
+ ) ,
2871
+ 11 => simd_shuffle32 (
2872
+ a,
2873
+ zero,
2874
+ [
2875
+ 11 , 12 , 13 , 14 , 15 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 27 , 28 , 29 , 30 , 31 ,
2876
+ 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 ,
2877
+ ] ,
2878
+ ) ,
2879
+ 12 => simd_shuffle32 (
2880
+ a,
2881
+ zero,
2882
+ [
2883
+ 12 , 13 , 14 , 15 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 28 , 29 , 30 , 31 , 32 ,
2884
+ 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 ,
2885
+ ] ,
2886
+ ) ,
2887
+ 13 => simd_shuffle32 (
2888
+ a,
2889
+ zero,
2890
+ [
2891
+ 13 , 14 , 15 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 29 , 30 , 31 , 32 , 32 ,
2892
+ 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 ,
2893
+ ] ,
2894
+ ) ,
2895
+ 14 => simd_shuffle32 (
2896
+ a,
2897
+ zero,
2898
+ [
2899
+ 14 , 15 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 30 , 31 , 32 , 32 , 32 ,
2900
+ 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 ,
2901
+ ] ,
2902
+ ) ,
2903
+ 15 => simd_shuffle32 (
2904
+ a,
2905
+ zero,
2906
+ [
2907
+ 14 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 31 , 32 , 32 , 32 , 32 ,
2908
+ 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 ,
2909
+ ] ,
2910
+ ) ,
2911
+ _ => zero,
2912
+ } ;
2913
+ transmute ( r)
2761
2914
}
2762
2915
2763
2916
/// Shifts packed 16-bit integers in `a` right by `count` while shifting in
@@ -4824,7 +4977,7 @@ mod tests {
4824
4977
#[ simd_test( enable = "avx2" ) ]
4825
4978
unsafe fn test_mm256_slli_si256 ( ) {
4826
4979
let a = _mm256_set1_epi64x ( 0xFFFFFFFF ) ;
4827
- let r = _mm256_slli_si256 ( a , 3 ) ;
4980
+ let r = _mm256_slli_si256 :: < 3 > ( a ) ;
4828
4981
assert_eq_m256i ( r, _mm256_set1_epi64x ( 0xFFFFFFFF000000 ) ) ;
4829
4982
}
4830
4983
@@ -4923,7 +5076,7 @@ mod tests {
4923
5076
17 , 18 , 19 , 20 , 21 , 22 , 23 , 24 ,
4924
5077
25 , 26 , 27 , 28 , 29 , 30 , 31 , 32 ,
4925
5078
) ;
4926
- let r = _mm256_srli_si256 ( a , 3 ) ;
5079
+ let r = _mm256_srli_si256 :: < 3 > ( a ) ;
4927
5080
#[ rustfmt:: skip]
4928
5081
let e = _mm256_setr_epi8 (
4929
5082
4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 ,
0 commit comments