Skip to content

Commit 6299fdc

Browse files
committed
[SelectionDAG] Deal with POISON for INSERT_VECTOR_ELT/INSERT_SUBVECTOR (part 1)
As reported in #141034 SelectionDAG::getNode had some unexpected behaviors when trying to create vectors with UNDEF elements. Since we treat both UNDEF and POISON as undefined (when using isUndef()) we can't just fold away INSERT_VECTOR_ELT/INSERT_SUBVECTOR based on isUndef(), as that could make the resulting vector more poisonous. Same kind of bug existed in DAGCombiner::visitINSERT_SUBVECTOR. Here are some examples: This fold was done even if vec[idx] was POISON: INSERT_VECTOR_ELT vec, UNDEF, idx -> vec This fold was done even if any of vec[idx..idx+size] was POISON: INSERT_SUBVECTOR vec, UNDEF, idx -> vec This fold was done even if the elements not extracted from vec could be POISON: sub = EXTRACT_SUBVECTOR vec, idx INSERT_SUBVECTOR UNDEF, sub, idx -> vec With this patch we avoid such folds unless we can prove that the result isn't more poisonous when eliminating the insert. This patch in itself result in some regressions. Goal is to try to deal with those regressions in follow up commits. Fixes #141034
1 parent 3287c1c commit 6299fdc

18 files changed

+866
-253
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 52 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23110,6 +23110,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
2311023110
auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
2311123111

2311223112
// Insert into out-of-bounds element is undefined.
23113+
// Code below relies on that we handle this special case early.
2311323114
if (IndexC && VT.isFixedLengthVector() &&
2311423115
IndexC->getZExtValue() >= VT.getVectorNumElements())
2311523116
return DAG.getUNDEF(VT);
@@ -23120,14 +23121,28 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
2312023121
InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
2312123122
return InVec;
2312223123

23123-
if (!IndexC) {
23124-
// If this is variable insert to undef vector, it might be better to splat:
23125-
// inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
23126-
if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
23127-
return DAG.getSplat(VT, DL, InVal);
23128-
return SDValue();
23124+
// If this is variable insert to undef vector, it might be better to splat:
23125+
// inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
23126+
if (!IndexC && InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
23127+
return DAG.getSplat(VT, DL, InVal);
23128+
23129+
// Try to drop insert of UNDEF/POISON elements. This is also done in getNode,
23130+
// but we also do it as a DAG combine since for example simplifications into
23131+
// SPLAT_VECTOR/BUILD_VECTOR may turn poison elements into undef/zero etc, and
23132+
// then suddenly the InVec is guaranteed to not be poison.
23133+
if (InVal.isUndef()) {
23134+
if (IndexC && VT.isFixedLengthVector()) {
23135+
APInt EltMask = APInt::getOneBitSet(VT.getVectorNumElements(),
23136+
IndexC->getZExtValue());
23137+
if (DAG.isGuaranteedNotToBePoison(InVec, EltMask))
23138+
return InVec;
23139+
}
23140+
return DAG.getFreeze(InVec);
2312923141
}
2313023142

23143+
if (!IndexC)
23144+
return SDValue();
23145+
2313123146
if (VT.isScalableVector())
2313223147
return SDValue();
2313323148

@@ -27560,18 +27575,42 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
2756027575
SDValue N2 = N->getOperand(2);
2756127576
uint64_t InsIdx = N->getConstantOperandVal(2);
2756227577

27563-
// If inserting an UNDEF, just return the original vector.
27564-
if (N1.isUndef())
27565-
return N0;
27578+
// If inserting an UNDEF, just return the original vector (unless it makes the
27579+
// result more poisonous).
27580+
if (N1.isUndef()) {
27581+
if (N1.getOpcode() == ISD::POISON)
27582+
return N0;
27583+
if (VT.isFixedLengthVector()) {
27584+
unsigned SubVecNumElts = N1.getValueType().getVectorNumElements();
27585+
APInt EltMask = APInt::getBitsSet(VT.getVectorNumElements(), InsIdx,
27586+
InsIdx + SubVecNumElts);
27587+
if (DAG.isGuaranteedNotToBePoison(N0, EltMask))
27588+
return N0;
27589+
}
27590+
return DAG.getFreeze(N0);
27591+
}
2756627592

27567-
// If this is an insert of an extracted vector into an undef vector, we can
27568-
// just use the input to the extract if the types match, and can simplify
27593+
// If this is an insert of an extracted vector into an undef/poison vector, we
27594+
// can just use the input to the extract if the types match, and can simplify
2756927595
// in some cases even if they don't.
2757027596
if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
2757127597
N1.getOperand(1) == N2) {
27598+
EVT N1VT = N1.getValueType();
2757227599
EVT SrcVT = N1.getOperand(0).getValueType();
27573-
if (SrcVT == VT)
27574-
return N1.getOperand(0);
27600+
if (SrcVT == VT) {
27601+
// Need to ensure that result isn't more poisonous if skipping both the
27602+
// extract+insert.
27603+
if (N0.getOpcode() == ISD::POISON)
27604+
return N1.getOperand(0);
27605+
if (VT.isFixedLengthVector() && N1VT.isFixedLengthVector()) {
27606+
unsigned SubVecNumElts = N1VT.getVectorNumElements();
27607+
APInt EltMask = APInt::getBitsSet(VT.getVectorNumElements(), InsIdx,
27608+
InsIdx + SubVecNumElts);
27609+
if (DAG.isGuaranteedNotToBePoison(N1.getOperand(0), ~EltMask))
27610+
return N1.getOperand(0);
27611+
} else if (DAG.isGuaranteedNotToBePoison(N1.getOperand(0)))
27612+
return N1.getOperand(0);
27613+
}
2757527614
// TODO: To remove the zero check, need to adjust the offset to
2757627615
// a multiple of the new src type.
2757727616
if (isNullConstant(N2)) {

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 55 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7923,23 +7923,42 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
79237923
// INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF, except
79247924
// for scalable vectors where we will generate appropriate code to
79257925
// deal with out-of-bounds cases correctly.
7926-
if (N3C && N1.getValueType().isFixedLengthVector() &&
7927-
N3C->getZExtValue() >= N1.getValueType().getVectorNumElements())
7926+
if (N3C && VT.isFixedLengthVector() &&
7927+
N3C->getZExtValue() >= VT.getVectorNumElements())
79287928
return getUNDEF(VT);
79297929

79307930
// Undefined index can be assumed out-of-bounds, so that's UNDEF too.
79317931
if (N3.isUndef())
79327932
return getUNDEF(VT);
79337933

7934-
// If the inserted element is an UNDEF, just use the input vector.
7935-
if (N2.isUndef())
7934+
// If inserting poison, just use the input vector.
7935+
if (N2.getOpcode() == ISD::POISON)
79367936
return N1;
79377937

7938+
// Inserting undef into undef/poison is still undef.
7939+
if (N2.getOpcode() == ISD::UNDEF && N1.isUndef())
7940+
return getUNDEF(VT);
7941+
7942+
// If the inserted element is an UNDEF, just use the input vector.
7943+
// But not if skipping the insert could make the result more poisonous.
7944+
if (N2.isUndef()) {
7945+
if (N3C && VT.isFixedLengthVector()) {
7946+
APInt EltMask =
7947+
APInt::getOneBitSet(VT.getVectorNumElements(), N3C->getZExtValue());
7948+
if (isGuaranteedNotToBePoison(N1, EltMask))
7949+
return N1;
7950+
} else if (isGuaranteedNotToBePoison(N1))
7951+
return N1;
7952+
}
79387953
break;
79397954
}
79407955
case ISD::INSERT_SUBVECTOR: {
7941-
// Inserting undef into undef is still undef.
7942-
if (N1.isUndef() && N2.isUndef())
7956+
// If inserting poison, just use the input vector,
7957+
if (N2.getOpcode() == ISD::POISON)
7958+
return N1;
7959+
7960+
// Inserting undef into undef/poison is still undef.
7961+
if (N2.getOpcode() == ISD::UNDEF && N1.isUndef())
79437962
return getUNDEF(VT);
79447963

79457964
EVT N2VT = N2.getValueType();
@@ -7968,11 +7987,37 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
79687987
if (VT == N2VT)
79697988
return N2;
79707989

7971-
// If this is an insert of an extracted vector into an undef vector, we
7972-
// can just use the input to the extract.
7990+
// If this is an insert of an extracted vector into an undef/poison vector,
7991+
// we can just use the input to the extract. But not if skipping the
7992+
// extract+insert could make the result more poisonous.
79737993
if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
7974-
N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT)
7975-
return N2.getOperand(0);
7994+
N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT) {
7995+
if (N1.getOpcode() == ISD::POISON)
7996+
return N2.getOperand(0);
7997+
if (VT.isFixedLengthVector() && N2VT.isFixedLengthVector()) {
7998+
unsigned LoBit = N3->getAsZExtVal();
7999+
unsigned HiBit = LoBit + N2VT.getVectorNumElements();
8000+
APInt EltMask =
8001+
APInt::getBitsSet(VT.getVectorNumElements(), LoBit, HiBit);
8002+
if (isGuaranteedNotToBePoison(N2.getOperand(0), ~EltMask))
8003+
return N2.getOperand(0);
8004+
} else if (isGuaranteedNotToBePoison(N2.getOperand(0)))
8005+
return N2.getOperand(0);
8006+
}
8007+
8008+
// If the inserted subvector is UNDEF, just use the input vector.
8009+
// But not if skipping the insert could make the result more poisonous.
8010+
if (N2.isUndef()) {
8011+
if (VT.isFixedLengthVector()) {
8012+
unsigned LoBit = N3->getAsZExtVal();
8013+
unsigned HiBit = LoBit + N2VT.getVectorNumElements();
8014+
APInt EltMask =
8015+
APInt::getBitsSet(VT.getVectorNumElements(), LoBit, HiBit);
8016+
if (isGuaranteedNotToBePoison(N1, EltMask))
8017+
return N1;
8018+
} else if (isGuaranteedNotToBePoison(N1))
8019+
return N1;
8020+
}
79768021
break;
79778022
}
79788023
case ISD::BITCAST:

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3429,8 +3429,8 @@ bool TargetLowering::SimplifyDemandedVectorElts(
34293429
break;
34303430
}
34313431
case ISD::INSERT_SUBVECTOR: {
3432-
// Demand any elements from the subvector and the remainder from the src its
3433-
// inserted into.
3432+
// Demand any elements from the subvector and the remainder from the src it
3433+
// is inserted into.
34343434
SDValue Src = Op.getOperand(0);
34353435
SDValue Sub = Op.getOperand(1);
34363436
uint64_t Idx = Op.getConstantOperandVal(2);
@@ -3439,6 +3439,10 @@ bool TargetLowering::SimplifyDemandedVectorElts(
34393439
APInt DemandedSrcElts = DemandedElts;
34403440
DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
34413441

3442+
// If none of the sub operand elements are demanded, bypass the insert.
3443+
if (!DemandedSubElts)
3444+
return TLO.CombineTo(Op, Src);
3445+
34423446
APInt SubUndef, SubZero;
34433447
if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
34443448
Depth + 1))

llvm/test/CodeGen/AArch64/arm64-build-vector.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ define void @widen_f16_build_vector(ptr %addr) {
5757
; CHECK-LABEL: widen_f16_build_vector:
5858
; CHECK: // %bb.0:
5959
; CHECK-NEXT: mov w8, #13294 // =0x33ee
60-
; CHECK-NEXT: movk w8, #13294, lsl #16
61-
; CHECK-NEXT: str w8, [x0]
60+
; CHECK-NEXT: dup v0.4h, w8
61+
; CHECK-NEXT: str s0, [x0]
6262
; CHECK-NEXT: ret
6363
store <2 x half> <half 0xH33EE, half 0xH33EE>, ptr %addr, align 2
6464
ret void

llvm/test/CodeGen/AArch64/concat-vector-add-combine.ll

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -93,17 +93,14 @@ define i32 @combine_add_8xi32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i3
9393
define i32 @combine_undef_add_8xi32(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
9494
; CHECK-LABEL: combine_undef_add_8xi32:
9595
; CHECK: // %bb.0:
96-
; CHECK-NEXT: fmov s1, w0
97-
; CHECK-NEXT: movi v0.2d, #0000000000000000
98-
; CHECK-NEXT: mov v1.s[1], w1
99-
; CHECK-NEXT: uhadd v0.4h, v0.4h, v0.4h
100-
; CHECK-NEXT: mov v1.s[2], w2
101-
; CHECK-NEXT: mov v1.s[3], w3
102-
; CHECK-NEXT: xtn v2.4h, v1.4s
103-
; CHECK-NEXT: shrn v1.4h, v1.4s, #16
104-
; CHECK-NEXT: uhadd v1.4h, v2.4h, v1.4h
105-
; CHECK-NEXT: mov v1.d[1], v0.d[0]
106-
; CHECK-NEXT: uaddlv s0, v1.8h
96+
; CHECK-NEXT: fmov s0, w0
97+
; CHECK-NEXT: mov v0.s[1], w1
98+
; CHECK-NEXT: mov v0.s[2], w2
99+
; CHECK-NEXT: mov v0.s[3], w3
100+
; CHECK-NEXT: uzp2 v1.8h, v0.8h, v0.8h
101+
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h
102+
; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
103+
; CHECK-NEXT: uaddlv s0, v0.8h
107104
; CHECK-NEXT: fmov w0, s0
108105
; CHECK-NEXT: ret
109106
%a1 = insertelement <8 x i32> poison, i32 %a, i32 0

0 commit comments

Comments
 (0)