1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
2
; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512F,X86-AVX512F
3
3
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512F,X64-AVX512F
4
- ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW,X86-AVX512BW
5
- ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW,X64-AVX512BW
4
+ ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW
5
+ ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW
6
6
7
7
define <16 x i32 > @shuffle_v8i64 (<16 x i32 > %t0 , <16 x i32 > %t1 ) {
8
- ; CHECK-LABEL: shuffle_v8i64:
9
- ; CHECK: # %bb.0: # %entry
10
- ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm2
11
- ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0
12
- ; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm2[0,1],zmm0[2,3],zmm2[4,5],zmm0[6,7],zmm2[8,9],zmm0[10,11],zmm2[12,13],zmm0[14,15]
13
- ; CHECK-NEXT: ret{{[l|q]}}
8
+ ; AVX512F-LABEL: shuffle_v8i64:
9
+ ; AVX512F: # %bb.0: # %entry
10
+ ; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm2
11
+ ; AVX512F-NEXT: vpsubd %zmm1, %zmm0, %zmm0
12
+ ; AVX512F-NEXT: movb $-86, %al
13
+ ; AVX512F-NEXT: kmovw %eax, %k1
14
+ ; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1}
15
+ ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0
16
+ ; AVX512F-NEXT: ret{{[l|q]}}
17
+ ;
18
+ ; AVX512BW-LABEL: shuffle_v8i64:
19
+ ; AVX512BW: # %bb.0: # %entry
20
+ ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm2
21
+ ; AVX512BW-NEXT: vpsubd %zmm1, %zmm0, %zmm0
22
+ ; AVX512BW-NEXT: movb $-86, %al
23
+ ; AVX512BW-NEXT: kmovd %eax, %k1
24
+ ; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1}
25
+ ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
26
+ ; AVX512BW-NEXT: ret{{[l|q]}}
14
27
entry:
15
28
%t2 = add nsw <16 x i32 > %t0 , %t1
16
29
%t3 = sub nsw <16 x i32 > %t0 , %t1
@@ -83,24 +96,15 @@ define <64 x i8> @addb_selectw_64xi8(<64 x i8> %t0, <64 x i8> %t1) {
83
96
; X64-AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
84
97
; X64-AVX512F-NEXT: retq
85
98
;
86
- ; X86-AVX512BW-LABEL: addb_selectw_64xi8:
87
- ; X86-AVX512BW: # %bb.0:
88
- ; X86-AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm2
89
- ; X86-AVX512BW-NEXT: movl $3, %eax
90
- ; X86-AVX512BW-NEXT: kmovd %eax, %k0
91
- ; X86-AVX512BW-NEXT: kmovd %k0, %k1
92
- ; X86-AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm2 {%k1}
93
- ; X86-AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
94
- ; X86-AVX512BW-NEXT: retl
95
- ;
96
- ; X64-AVX512BW-LABEL: addb_selectw_64xi8:
97
- ; X64-AVX512BW: # %bb.0:
98
- ; X64-AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm2
99
- ; X64-AVX512BW-NEXT: movl $3, %eax
100
- ; X64-AVX512BW-NEXT: kmovq %rax, %k1
101
- ; X64-AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm2 {%k1}
102
- ; X64-AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
103
- ; X64-AVX512BW-NEXT: retq
99
+ ; AVX512BW-LABEL: addb_selectw_64xi8:
100
+ ; AVX512BW: # %bb.0:
101
+ ; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm2
102
+ ; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0
103
+ ; AVX512BW-NEXT: movl $1, %eax
104
+ ; AVX512BW-NEXT: kmovd %eax, %k1
105
+ ; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm2 {%k1}
106
+ ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
107
+ ; AVX512BW-NEXT: ret{{[l|q]}}
104
108
%t2 = add nsw <64 x i8 > %t0 , %t1
105
109
%t3 = sub nsw <64 x i8 > %t0 , %t1
106
110
%t4 = shufflevector <64 x i8 > %t2 , <64 x i8 > %t3 , <64 x i32 > <i32 64 , i32 65 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 , i32 16 , i32 17 , i32 18 , i32 19 , i32 20 , i32 21 , i32 22 , i32 23 , i32 24 , i32 25 , i32 26 , i32 27 , i32 28 , i32 29 , i32 30 , i32 31 , i32 32 , i32 33 , i32 34 , i32 35 , i32 36 , i32 37 , i32 38 , i32 39 , i32 40 , i32 41 , i32 42 , i32 43 , i32 44 , i32 45 , i32 46 , i32 47 , i32 48 , i32 49 , i32 50 , i32 51 , i32 52 , i32 53 , i32 54 , i32 55 , i32 56 , i32 57 , i32 58 , i32 59 , i32 60 , i32 61 , i32 62 , i32 63 >
@@ -165,9 +169,10 @@ define <32 x i16> @addw_selectd_32xi16(<32 x i16> %t0, <32 x i16> %t1) {
165
169
; AVX512BW-LABEL: addw_selectd_32xi16:
166
170
; AVX512BW: # %bb.0:
167
171
; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm2
168
- ; AVX512BW-NEXT: movl $3, %eax
172
+ ; AVX512BW-NEXT: vpsubw %zmm1, %zmm0, %zmm0
173
+ ; AVX512BW-NEXT: movw $1, %ax
169
174
; AVX512BW-NEXT: kmovd %eax, %k1
170
- ; AVX512BW-NEXT: vpsubw %zmm1, %zmm0, %zmm2 {%k1}
175
+ ; AVX512BW-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1}
171
176
; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
172
177
; AVX512BW-NEXT: ret{{[l|q]}}
173
178
%t2 = add nsw <32 x i16 > %t0 , %t1
@@ -193,18 +198,20 @@ define <16 x i32> @addd_selectq_16xi32(<16 x i32> %t0, <16 x i32> %t1) {
193
198
; AVX512F-LABEL: addd_selectq_16xi32:
194
199
; AVX512F: # %bb.0:
195
200
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm2
196
- ; AVX512F-NEXT: movw $3, %ax
201
+ ; AVX512F-NEXT: vpsubd %zmm1, %zmm0, %zmm0
202
+ ; AVX512F-NEXT: movb $1, %al
197
203
; AVX512F-NEXT: kmovw %eax, %k1
198
- ; AVX512F-NEXT: vpsubd %zmm1, %zmm0, %zmm2 {%k1}
204
+ ; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1}
199
205
; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0
200
206
; AVX512F-NEXT: ret{{[l|q]}}
201
207
;
202
208
; AVX512BW-LABEL: addd_selectq_16xi32:
203
209
; AVX512BW: # %bb.0:
204
210
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm2
205
- ; AVX512BW-NEXT: movw $3, %ax
211
+ ; AVX512BW-NEXT: vpsubd %zmm1, %zmm0, %zmm0
212
+ ; AVX512BW-NEXT: movb $1, %al
206
213
; AVX512BW-NEXT: kmovd %eax, %k1
207
- ; AVX512BW-NEXT: vpsubd %zmm1, %zmm0, %zmm2 {%k1}
214
+ ; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1}
208
215
; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
209
216
; AVX512BW-NEXT: ret{{[l|q]}}
210
217
%t2 = add nsw <16 x i32 > %t0 , %t1
0 commit comments