Skip to content

Commit ec5c4d3

Browse files
committed
[AMDGPU][SDAG] Handle ISD::PTRADD in various special cases
There are more places in SIISelLowering.cpp and AMDGPUISelDAGToDAG.cpp that check for ISD::ADD in a pointer context, but as far as I can tell those are only relevant for 32-bit pointer arithmetic (like frame indices/scratch addresses and LDS), for which we don't enable PTRADD generation yet. For SWDEV-516125.
1 parent b4212e9 commit ec5c4d3

File tree

6 files changed

+105
-194
lines changed

6 files changed

+105
-194
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8389,7 +8389,7 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) {
83898389
GlobalAddressSDNode *G = nullptr;
83908390
if (Src.getOpcode() == ISD::GlobalAddress)
83918391
G = cast<GlobalAddressSDNode>(Src);
8392-
else if (Src.getOpcode() == ISD::ADD &&
8392+
else if (Src->isAnyAdd() &&
83938393
Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&
83948394
Src.getOperand(1).getOpcode() == ISD::Constant) {
83958395
G = cast<GlobalAddressSDNode>(Src.getOperand(0));

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -632,8 +632,14 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
632632
// operands on the new node are also disjoint.
633633
SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
634634
: SDNodeFlags::None);
635+
unsigned Opcode = Op.getOpcode();
636+
if (Opcode == ISD::PTRADD) {
637+
// It isn't a ptradd anymore if it doesn't operate on the entire
638+
// pointer.
639+
Opcode = ISD::ADD;
640+
}
635641
SDValue X = DAG.getNode(
636-
Op.getOpcode(), dl, SmallVT,
642+
Opcode, dl, SmallVT,
637643
DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
638644
DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
639645
assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
@@ -2861,6 +2867,11 @@ bool TargetLowering::SimplifyDemandedBits(
28612867
return TLO.CombineTo(Op, And1);
28622868
}
28632869
[[fallthrough]];
2870+
case ISD::PTRADD:
2871+
if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
2872+
break;
2873+
// PTRADD behaves like ADD if pointers are represented as integers.
2874+
[[fallthrough]];
28642875
case ISD::ADD:
28652876
case ISD::SUB: {
28662877
// Add, Sub, and Mul don't demand any bits in positions beyond that
@@ -2970,10 +2981,10 @@ bool TargetLowering::SimplifyDemandedBits(
29702981

29712982
if (Op.getOpcode() == ISD::MUL) {
29722983
Known = KnownBits::mul(KnownOp0, KnownOp1);
2973-
} else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2984+
} else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB.
29742985
Known = KnownBits::computeForAddSub(
2975-
Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(),
2976-
Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2986+
Op->isAnyAdd(), Flags.hasNoSignedWrap(), Flags.hasNoUnsignedWrap(),
2987+
KnownOp0, KnownOp1);
29772988
}
29782989
break;
29792990
}
@@ -5696,7 +5707,7 @@ bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
56965707
return true;
56975708
}
56985709

5699-
if (N->getOpcode() == ISD::ADD) {
5710+
if (N->isAnyAdd()) {
57005711
SDValue N1 = N->getOperand(0);
57015712
SDValue N2 = N->getOperand(1);
57025713
if (isGAPlusOffset(N1.getNode(), GA, Offset)) {

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1489,7 +1489,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
14891489
C1 = nullptr;
14901490
}
14911491

1492-
if (N0.getOpcode() == ISD::ADD) {
1492+
if (N0->isAnyAdd()) {
14931493
// (add N2, N3) -> addr64, or
14941494
// (add (add N2, N3), C1) -> addr64
14951495
SDValue N2 = N0.getOperand(0);
@@ -1951,7 +1951,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, SDValue Addr,
19511951
}
19521952

19531953
// Match the variable offset.
1954-
if (Addr.getOpcode() == ISD::ADD) {
1954+
if (Addr->isAnyAdd()) {
19551955
LHS = Addr.getOperand(0);
19561956

19571957
if (!LHS->isDivergent()) {
@@ -2418,7 +2418,7 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDNode *N, SDValue Addr,
24182418

24192419
SDValue N0, N1;
24202420
// Extract the base and offset if possible.
2421-
if (CurDAG->isBaseWithConstantOffset(Addr) || Addr.getOpcode() == ISD::ADD) {
2421+
if (CurDAG->isBaseWithConstantOffset(Addr) || Addr->isAnyAdd()) {
24222422
N0 = Addr.getOperand(0);
24232423
N1 = Addr.getOperand(1);
24242424
} else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10757,7 +10757,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
1075710757
SDValue VOffset;
1075810758
// Try to split SAddr and VOffset. Global and LDS pointers share the same
1075910759
// immediate offset, so we cannot use a regular SelectGlobalSAddr().
10760-
if (Addr->isDivergent() && Addr.getOpcode() == ISD::ADD) {
10760+
if (Addr->isDivergent() && Addr->isAnyAdd()) {
1076110761
SDValue LHS = Addr.getOperand(0);
1076210762
SDValue RHS = Addr.getOperand(1);
1076310763

@@ -12306,8 +12306,7 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N, unsigned AddrSpace,
1230612306

1230712307
// We only do this to handle cases where it's profitable when there are
1230812308
// multiple uses of the add, so defer to the standard combine.
12309-
if ((N0.getOpcode() != ISD::ADD && N0.getOpcode() != ISD::OR) ||
12310-
N0->hasOneUse())
12309+
if ((!N0->isAnyAdd() && N0.getOpcode() != ISD::OR) || N0->hasOneUse())
1231112310
return SDValue();
1231212311

1231312312
const ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N1);
@@ -12346,6 +12345,8 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N, unsigned AddrSpace,
1234612345
N->getFlags().hasNoUnsignedWrap() &&
1234712346
(N0.getOpcode() == ISD::OR || N0->getFlags().hasNoUnsignedWrap()));
1234812347

12348+
// Use ISD::ADD even if the original operation was ISD::PTRADD, since we can't
12349+
// be sure that the new left operand is a proper base pointer.
1234912350
return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset, Flags);
1235012351
}
1235112352

llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll

Lines changed: 22 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -5,50 +5,26 @@
55
; Test PTRADD handling in AMDGPUDAGToDAGISel::SelectMUBUF.
66

77
define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
8-
; GFX6_PTRADD-LABEL: v_add_i32:
9-
; GFX6_PTRADD: ; %bb.0:
10-
; GFX6_PTRADD-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
11-
; GFX6_PTRADD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
12-
; GFX6_PTRADD-NEXT: s_mov_b32 s7, 0x100f000
13-
; GFX6_PTRADD-NEXT: s_mov_b32 s10, 0
14-
; GFX6_PTRADD-NEXT: s_mov_b32 s11, s7
15-
; GFX6_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
16-
; GFX6_PTRADD-NEXT: v_mov_b32_e32 v1, s3
17-
; GFX6_PTRADD-NEXT: v_add_i32_e32 v0, vcc, s2, v0
18-
; GFX6_PTRADD-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
19-
; GFX6_PTRADD-NEXT: s_mov_b32 s8, s10
20-
; GFX6_PTRADD-NEXT: s_mov_b32 s9, s10
21-
; GFX6_PTRADD-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
22-
; GFX6_PTRADD-NEXT: s_waitcnt vmcnt(0)
23-
; GFX6_PTRADD-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 glc
24-
; GFX6_PTRADD-NEXT: s_waitcnt vmcnt(0)
25-
; GFX6_PTRADD-NEXT: s_mov_b32 s6, -1
26-
; GFX6_PTRADD-NEXT: s_mov_b32 s4, s0
27-
; GFX6_PTRADD-NEXT: s_mov_b32 s5, s1
28-
; GFX6_PTRADD-NEXT: v_add_i32_e32 v0, vcc, v2, v0
29-
; GFX6_PTRADD-NEXT: buffer_store_dword v0, off, s[4:7], 0
30-
; GFX6_PTRADD-NEXT: s_endpgm
31-
;
32-
; GFX6_LEGACY-LABEL: v_add_i32:
33-
; GFX6_LEGACY: ; %bb.0:
34-
; GFX6_LEGACY-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
35-
; GFX6_LEGACY-NEXT: s_mov_b32 s7, 0x100f000
36-
; GFX6_LEGACY-NEXT: s_mov_b32 s10, 0
37-
; GFX6_LEGACY-NEXT: s_mov_b32 s11, s7
38-
; GFX6_LEGACY-NEXT: v_lshlrev_b32_e32 v0, 2, v0
39-
; GFX6_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
40-
; GFX6_LEGACY-NEXT: s_mov_b64 s[8:9], s[2:3]
41-
; GFX6_LEGACY-NEXT: v_mov_b32_e32 v1, 0
42-
; GFX6_LEGACY-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
43-
; GFX6_LEGACY-NEXT: s_waitcnt vmcnt(0)
44-
; GFX6_LEGACY-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 glc
45-
; GFX6_LEGACY-NEXT: s_waitcnt vmcnt(0)
46-
; GFX6_LEGACY-NEXT: s_mov_b32 s6, -1
47-
; GFX6_LEGACY-NEXT: s_mov_b32 s4, s0
48-
; GFX6_LEGACY-NEXT: s_mov_b32 s5, s1
49-
; GFX6_LEGACY-NEXT: v_add_i32_e32 v0, vcc, v2, v0
50-
; GFX6_LEGACY-NEXT: buffer_store_dword v0, off, s[4:7], 0
51-
; GFX6_LEGACY-NEXT: s_endpgm
8+
; GFX6-LABEL: v_add_i32:
9+
; GFX6: ; %bb.0:
10+
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
11+
; GFX6-NEXT: s_mov_b32 s7, 0x100f000
12+
; GFX6-NEXT: s_mov_b32 s10, 0
13+
; GFX6-NEXT: s_mov_b32 s11, s7
14+
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0
15+
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
16+
; GFX6-NEXT: s_mov_b64 s[8:9], s[2:3]
17+
; GFX6-NEXT: v_mov_b32_e32 v1, 0
18+
; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
19+
; GFX6-NEXT: s_waitcnt vmcnt(0)
20+
; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 glc
21+
; GFX6-NEXT: s_waitcnt vmcnt(0)
22+
; GFX6-NEXT: s_mov_b32 s6, -1
23+
; GFX6-NEXT: s_mov_b32 s4, s0
24+
; GFX6-NEXT: s_mov_b32 s5, s1
25+
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v2, v0
26+
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
27+
; GFX6-NEXT: s_endpgm
5228
%tid = call i32 @llvm.amdgcn.workitem.id.x()
5329
%gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
5430
%b_ptr = getelementptr i32, ptr addrspace(1) %gep, i32 1
@@ -60,4 +36,5 @@ define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in
6036
}
6137

6238
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
63-
; GFX6: {{.*}}
39+
; GFX6_LEGACY: {{.*}}
40+
; GFX6_PTRADD: {{.*}}

0 commit comments

Comments
 (0)