Skip to content

Commit 6511090

Browse files
committed
[SDAG][AMDGPU] Allow opting in to OOB-generating PTRADD transforms
This PR adds a TargetLowering hook, canTransformPtrArithOutOfBounds, that targets can use to allow transformations to introduce out-of-bounds pointer arithmetic. It also moves two such transformations from the AMDGPU-specific DAG combines to the generic DAGCombiner. This is motivated by target features like AArch64's checked pointer arithmetic, CPA, which does not tolerate the introduction of out-of-bounds pointer arithmetic.
1 parent 2f27f45 commit 6511090

File tree

4 files changed

+94
-100
lines changed

4 files changed

+94
-100
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3491,6 +3491,13 @@ class LLVM_ABI TargetLoweringBase {
34913491
return false;
34923492
}
34933493

3494+
/// True if the target allows transformations of in-bounds pointer
3495+
/// arithmetic that cause out-of-bounds intermediate results.
3496+
virtual bool canTransformPtrArithOutOfBounds(const Function &F,
3497+
EVT PtrVT) const {
3498+
return false;
3499+
}
3500+
34943501
/// Does this target support complex deinterleaving
34953502
virtual bool isComplexDeinterleavingSupported() const { return false; }
34963503

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 74 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -2696,59 +2696,82 @@ SDValue DAGCombiner::visitPTRADD(SDNode *N) {
26962696
if (PtrVT == IntVT && isNullConstant(N0))
26972697
return N1;
26982698

2699-
if (N0.getOpcode() != ISD::PTRADD ||
2700-
reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1))
2701-
return SDValue();
2702-
2703-
SDValue X = N0.getOperand(0);
2704-
SDValue Y = N0.getOperand(1);
2705-
SDValue Z = N1;
2706-
bool N0OneUse = N0.hasOneUse();
2707-
bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2708-
bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2709-
2710-
// (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2711-
// * y is a constant and (ptradd x, y) has one use; or
2712-
// * y and z are both constants.
2713-
if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2714-
// If both additions in the original were NUW, the new ones are as well.
2715-
SDNodeFlags Flags =
2716-
(N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2717-
SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2718-
AddToWorklist(Add.getNode());
2719-
return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2699+
if (N0.getOpcode() == ISD::PTRADD &&
2700+
!reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) {
2701+
SDValue X = N0.getOperand(0);
2702+
SDValue Y = N0.getOperand(1);
2703+
SDValue Z = N1;
2704+
bool N0OneUse = N0.hasOneUse();
2705+
bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2706+
bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2707+
2708+
// (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2709+
// * y is a constant and (ptradd x, y) has one use; or
2710+
// * y and z are both constants.
2711+
if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2712+
// If both additions in the original were NUW, the new ones are as well.
2713+
SDNodeFlags Flags =
2714+
(N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2715+
SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2716+
AddToWorklist(Add.getNode());
2717+
return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2718+
}
2719+
}
2720+
2721+
// The following combines can turn in-bounds pointer arithmetic out of bounds.
2722+
// That is problematic for settings like AArch64's CPA, which checks that
2723+
// intermediate results of pointer arithmetic remain in bounds. The target
2724+
// therefore needs to opt-in to enable them.
2725+
if (!TLI.canTransformPtrArithOutOfBounds(
2726+
DAG.getMachineFunction().getFunction(), PtrVT))
2727+
return SDValue();
2728+
2729+
if (N0.getOpcode() == ISD::PTRADD && N1.getOpcode() == ISD::Constant) {
2730+
// Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with
2731+
// global address GA and constant c, such that c can be folded into GA.
2732+
SDValue GAValue = N0.getOperand(0);
2733+
if (const GlobalAddressSDNode *GA =
2734+
dyn_cast<GlobalAddressSDNode>(GAValue)) {
2735+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2736+
if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2737+
// If both additions in the original were NUW, reassociation preserves
2738+
// that.
2739+
SDNodeFlags Flags =
2740+
(N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2741+
SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags);
2742+
AddToWorklist(Inner.getNode());
2743+
return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags);
2744+
}
2745+
}
27202746
}
27212747

2722-
// TODO: There is another possible fold here that was proven useful.
2723-
// It would be this:
2724-
//
2725-
// (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y) if:
2726-
// * (ptradd x, y) has one use; and
2727-
// * y is a constant; and
2728-
// * z is not a constant.
2729-
//
2730-
// In some cases, specifically in AArch64's FEAT_CPA, it exposes the
2731-
// opportunity to select more complex instructions such as SUBPT and
2732-
// MSUBPT. However, a hypothetical corner case has been found that we could
2733-
// not avoid. Consider this (pseudo-POSIX C):
2734-
//
2735-
// char *foo(char *x, int z) {return (x + LARGE_CONSTANT) + z;}
2736-
// char *p = mmap(LARGE_CONSTANT);
2737-
// char *q = foo(p, -LARGE_CONSTANT);
2738-
//
2739-
// Then x + LARGE_CONSTANT is one-past-the-end, so valid, and a
2740-
// further + z takes it back to the start of the mapping, so valid,
2741-
// regardless of the address mmap gave back. However, if mmap gives you an
2742-
// address < LARGE_CONSTANT (ignoring high bits), x - LARGE_CONSTANT will
2743-
// borrow from the high bits (with the subsequent + z carrying back into
2744-
// the high bits to give you a well-defined pointer) and thus trip
2745-
// FEAT_CPA's pointer corruption checks.
2746-
//
2747-
// We leave this fold as an opportunity for future work, addressing the
2748-
// corner case for FEAT_CPA, as well as reconciling the solution with the
2749-
// more general application of pointer arithmetic in other future targets.
2750-
// For now each architecture that wants this fold must implement it in the
2751-
// target-specific code (see e.g. SITargetLowering::performPtrAddCombine)
2748+
if (N1.getOpcode() == ISD::ADD && N1.hasOneUse()) {
2749+
// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
2750+
// y is not, and (add y, z) is used only once.
2751+
// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
2752+
// z is not, and (add y, z) is used only once.
2753+
// The goal is to move constant offsets to the outermost ptradd, to create
2754+
// more opportunities to fold offsets into memory instructions.
2755+
// Together with the another combine above, this also implements
2756+
// (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
2757+
SDValue X = N0;
2758+
SDValue Y = N1.getOperand(0);
2759+
SDValue Z = N1.getOperand(1);
2760+
bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2761+
bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2762+
2763+
// If both additions in the original were NUW, reassociation preserves that.
2764+
SDNodeFlags ReassocFlags =
2765+
(N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2766+
2767+
if (ZIsConstant != YIsConstant) {
2768+
if (YIsConstant)
2769+
std::swap(Y, Z);
2770+
SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
2771+
AddToWorklist(Inner.getNode());
2772+
return DAG.getMemBasePlusOffset(Inner, Z, DL, ReassocFlags);
2773+
}
2774+
}
27522775

27532776
return SDValue();
27542777
}

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 10 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -10882,6 +10882,11 @@ bool SITargetLowering::shouldPreservePtrArith(const Function &F,
1088210882
return UseSelectionDAGPTRADD && PtrVT == MVT::i64;
1088310883
}
1088410884

10885+
bool SITargetLowering::canTransformPtrArithOutOfBounds(const Function &F,
10886+
EVT PtrVT) const {
10887+
return true;
10888+
}
10889+
1088510890
// The raw.(t)buffer and struct.(t)buffer intrinsics have two offset args:
1088610891
// offset (the offset that is included in bounds checking and swizzling, to be
1088710892
// split between the instruction's voffset and immoffset fields) and soffset
@@ -15428,64 +15433,17 @@ SDValue SITargetLowering::performPtrAddCombine(SDNode *N,
1542815433
return Folded;
1542915434
}
1543015435

15431-
if (N0.getOpcode() == ISD::PTRADD && N1.getOpcode() == ISD::Constant) {
15432-
// Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with
15433-
// global address GA and constant c, such that c can be folded into GA.
15434-
SDValue GAValue = N0.getOperand(0);
15435-
if (const GlobalAddressSDNode *GA =
15436-
dyn_cast<GlobalAddressSDNode>(GAValue)) {
15437-
if (DCI.isBeforeLegalizeOps() && isOffsetFoldingLegal(GA)) {
15438-
// If both additions in the original were NUW, reassociation preserves
15439-
// that.
15440-
SDNodeFlags Flags =
15441-
(N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
15442-
SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags);
15443-
DCI.AddToWorklist(Inner.getNode());
15444-
return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags);
15445-
}
15446-
}
15447-
}
15448-
1544915436
if (N1.getOpcode() != ISD::ADD || !N1.hasOneUse())
1545015437
return SDValue();
1545115438

15452-
// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
15453-
// y is not, and (add y, z) is used only once.
15454-
// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
15455-
// z is not, and (add y, z) is used only once.
15456-
// The goal is to move constant offsets to the outermost ptradd, to create
15457-
// more opportunities to fold offsets into memory instructions.
15458-
// Together with the generic combines in DAGCombiner.cpp, this also
15459-
// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
15460-
//
15461-
// This transform is here instead of in the general DAGCombiner as it can
15462-
// turn in-bounds pointer arithmetic out-of-bounds, which is problematic for
15463-
// AArch64's CPA.
1546415439
SDValue X = N0;
1546515440
SDValue Y = N1.getOperand(0);
1546615441
SDValue Z = N1.getOperand(1);
1546715442
bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
1546815443
bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
1546915444

15470-
// If both additions in the original were NUW, reassociation preserves that.
15471-
SDNodeFlags ReassocFlags =
15472-
(N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
15473-
15474-
if (ZIsConstant != YIsConstant) {
15475-
if (YIsConstant)
15476-
std::swap(Y, Z);
15477-
SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
15478-
DCI.AddToWorklist(Inner.getNode());
15479-
return DAG.getMemBasePlusOffset(Inner, Z, DL, ReassocFlags);
15480-
}
15481-
15482-
// If one of Y and Z is constant, they have been handled above. If both were
15483-
// constant, the addition would have been folded in SelectionDAG::getNode
15484-
// already. This ensures that the generic DAG combines won't undo the
15485-
// following reassociation.
15486-
assert(!YIsConstant && !ZIsConstant);
15487-
15488-
if (!X->isDivergent() && Y->isDivergent() != Z->isDivergent()) {
15445+
if (!YIsConstant && !ZIsConstant && !X->isDivergent() &&
15446+
Y->isDivergent() != Z->isDivergent()) {
1548915447
// Reassociate (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if x and
1549015448
// y are uniform and z isn't.
1549115449
// Reassociate (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if x and
@@ -15496,6 +15454,9 @@ SDValue SITargetLowering::performPtrAddCombine(SDNode *N,
1549615454
// reassociate.
1549715455
if (Y->isDivergent())
1549815456
std::swap(Y, Z);
15457+
// If both additions in the original were NUW, reassociation preserves that.
15458+
SDNodeFlags ReassocFlags =
15459+
(N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
1549915460
SDValue UniformInner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
1550015461
DCI.AddToWorklist(UniformInner.getNode());
1550115462
return DAG.getMemBasePlusOffset(UniformInner, Z, DL, ReassocFlags);

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,9 @@ class SITargetLowering final : public AMDGPUTargetLowering {
265265

266266
bool shouldPreservePtrArith(const Function &F, EVT PtrVT) const override;
267267

268+
bool canTransformPtrArithOutOfBounds(const Function &F,
269+
EVT PtrVT) const override;
270+
268271
private:
269272
// Analyze a combined offset from an amdgcn_s_buffer_load intrinsic and store
270273
// the three offsets (voffset, soffset and instoffset) into the SDValue[3]

0 commit comments

Comments
 (0)