Skip to content

Commit 3e8d980

Browse files
Kenotstellar
authored andcommitted
[X86] Don't produce bad x86andp nodes for i1 vectors
In D85499, I attempted to fix this same issue by canonicalizing andnp for i1 vectors, but since there was some opposition to such a change, this commit just fixes the bug by using two different forms depending on which kind of vector type is in use. We can then always decide to switch the canonical forms later. Description of the original bug: We have a DAG combine that tries to fold (vselect cond, 0000..., X) -> (andnp cond, x). However, it does so by attempting to create an i64 vector with the number of elements obtained by truncating division by 64 from the bitwidth. This is bad for mask vectors like v8i1, since that division is just zero. Besides, we don't want i64 vectors anyway. For i1 vectors, switch the pattern to (andnp (not cond), x), which is the canonical form for `kandn` on mask registers. Fixes JuliaLang/julia#36955. Differential Revision: https://reviews.llvm.org/D85553 (cherry picked from commit c58674d)
1 parent 973b95e commit 3e8d980

File tree

2 files changed

+69
-4
lines changed

2 files changed

+69
-4
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+8-4
Original file line numberDiff line numberDiff line change
@@ -39588,10 +39588,14 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
3958839588

3958939589
// vselect Cond, 000..., X -> andn Cond, X
3959039590
if (TValIsAllZeros) {
39591-
MVT AndNVT = MVT::getVectorVT(MVT::i64, CondVT.getSizeInBits() / 64);
39592-
SDValue CastCond = DAG.getBitcast(AndNVT, Cond);
39593-
SDValue CastRHS = DAG.getBitcast(AndNVT, RHS);
39594-
SDValue AndN = DAG.getNode(X86ISD::ANDNP, DL, AndNVT, CastCond, CastRHS);
39591+
SDValue CastRHS = DAG.getBitcast(CondVT, RHS);
39592+
SDValue AndN;
39593+
// The canonical form differs for i1 vectors - x86andnp is not used
39594+
if (CondVT.getScalarType() == MVT::i1)
39595+
AndN = DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT),
39596+
CastRHS);
39597+
else
39598+
AndN = DAG.getNode(X86ISD::ANDNP, DL, CondVT, Cond, CastRHS);
3959539599
return DAG.getBitcast(VT, AndN);
3959639600
}
3959739601

llvm/test/CodeGen/X86/avx512-select.ll

+61
Original file line numberDiff line numberDiff line change
@@ -705,3 +705,64 @@ define void @select_v1i1(<1 x i1>* %w, <1 x i1>* %x, <1 x i1>* %y, i1 %z) nounwi
705705
store <1 x i1> %c, <1 x i1>* %x
706706
ret void
707707
}
708+
709+
; Regression test from https://github.com/JuliaLang/julia/issues/36955
710+
define i8 @julia_issue36955(<8 x i1> %mask, <8 x double> %a) {
711+
; X86-AVX512F-LABEL: julia_issue36955:
712+
; X86-AVX512F: # %bb.0:
713+
; X86-AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0
714+
; X86-AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
715+
; X86-AVX512F-NEXT: vxorpd %xmm2, %xmm2, %xmm2
716+
; X86-AVX512F-NEXT: vcmplepd %zmm2, %zmm1, %k1
717+
; X86-AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
718+
; X86-AVX512F-NEXT: korw %k0, %k1, %k0
719+
; X86-AVX512F-NEXT: kmovw %k0, %eax
720+
; X86-AVX512F-NEXT: # kill: def $al killed $al killed $eax
721+
; X86-AVX512F-NEXT: vzeroupper
722+
; X86-AVX512F-NEXT: retl
723+
;
724+
; X64-AVX512F-LABEL: julia_issue36955:
725+
; X64-AVX512F: # %bb.0:
726+
; X64-AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0
727+
; X64-AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
728+
; X64-AVX512F-NEXT: vxorpd %xmm2, %xmm2, %xmm2
729+
; X64-AVX512F-NEXT: vcmplepd %zmm2, %zmm1, %k1
730+
; X64-AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
731+
; X64-AVX512F-NEXT: korw %k0, %k1, %k0
732+
; X64-AVX512F-NEXT: kmovw %k0, %eax
733+
; X64-AVX512F-NEXT: # kill: def $al killed $al killed $eax
734+
; X64-AVX512F-NEXT: vzeroupper
735+
; X64-AVX512F-NEXT: retq
736+
;
737+
; X86-AVX512BW-LABEL: julia_issue36955:
738+
; X86-AVX512BW: # %bb.0:
739+
; X86-AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
740+
; X86-AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
741+
; X86-AVX512BW-NEXT: vxorpd %xmm3, %xmm3, %xmm3
742+
; X86-AVX512BW-NEXT: vcmplepd %zmm3, %zmm1, %k1
743+
; X86-AVX512BW-NEXT: vpcmpgtw %zmm0, %zmm2, %k0 {%k1}
744+
; X86-AVX512BW-NEXT: korw %k0, %k1, %k0
745+
; X86-AVX512BW-NEXT: kmovd %k0, %eax
746+
; X86-AVX512BW-NEXT: # kill: def $al killed $al killed $eax
747+
; X86-AVX512BW-NEXT: vzeroupper
748+
; X86-AVX512BW-NEXT: retl
749+
;
750+
; X64-AVX512BW-LABEL: julia_issue36955:
751+
; X64-AVX512BW: # %bb.0:
752+
; X64-AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
753+
; X64-AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
754+
; X64-AVX512BW-NEXT: vxorpd %xmm3, %xmm3, %xmm3
755+
; X64-AVX512BW-NEXT: vcmplepd %zmm3, %zmm1, %k1
756+
; X64-AVX512BW-NEXT: vpcmpgtw %zmm0, %zmm2, %k0 {%k1}
757+
; X64-AVX512BW-NEXT: korw %k0, %k1, %k0
758+
; X64-AVX512BW-NEXT: kmovd %k0, %eax
759+
; X64-AVX512BW-NEXT: # kill: def $al killed $al killed $eax
760+
; X64-AVX512BW-NEXT: vzeroupper
761+
; X64-AVX512BW-NEXT: retq
762+
%fcmp = fcmp ugt <8 x double> %a, zeroinitializer
763+
%xor = xor <8 x i1> %fcmp, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
764+
%select1 = select <8 x i1> %fcmp, <8 x i1> zeroinitializer, <8 x i1> %mask
765+
%select2 = select <8 x i1> %xor, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i1> %select1
766+
%ret = bitcast <8 x i1> %select2 to i8
767+
ret i8 %ret
768+
}

0 commit comments

Comments
 (0)