Skip to content

Commit 990bed6

Browse files
authored
[AMDGPU] New intrinsic llvm.amdgcn.pops.exiting.wave.id (llvm#89612)
This provides access to the special scalar source value SRC_POPS_EXITING_WAVE_ID on GFX9 and GFX10.
1 parent 0a62a99 commit 990bed6

7 files changed

+71
-0
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

+5
Original file line numberDiff line numberDiff line change
@@ -2482,6 +2482,11 @@ class AMDGPUGlobalLoadLDS :
24822482
"", [SDNPMemOperand]>;
24832483
def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS;
24842484

2485+
// Use read/write of inaccessible memory to model the fact that this reads a
2486+
// volatile value.
2487+
def int_amdgcn_pops_exiting_wave_id :
2488+
DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>;
2489+
24852490
//===----------------------------------------------------------------------===//
24862491
// GFX10 Intrinsics
24872492
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -2526,6 +2526,14 @@ void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N) {
25262526
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
25272527
}
25282528

2529+
void AMDGPUDAGToDAGISel::SelectPOPSExitingWaveID(SDNode *N) {
2530+
// TODO: Select this with a tablegen pattern. This is tricky because the
2531+
// intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
2532+
// mayLoad/mayStore and tablegen complains about the mismatch.
2533+
SDValue Reg = CurDAG->getRegister(AMDGPU::SRC_POPS_EXITING_WAVE_ID, MVT::i32);
2534+
CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, N->getVTList(), Reg);
2535+
}
2536+
25292537
static unsigned gwsIntrinToOpcode(unsigned IntrID) {
25302538
switch (IntrID) {
25312539
case Intrinsic::amdgcn_ds_gws_init:
@@ -2682,6 +2690,9 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
26822690
case Intrinsic::amdgcn_ds_bvh_stack_rtn:
26832691
SelectDSBvhStackIntrinsic(N);
26842692
return;
2693+
case Intrinsic::amdgcn_pops_exiting_wave_id:
2694+
SelectPOPSExitingWaveID(N);
2695+
return;
26852696
}
26862697

26872698
SelectCode(N);

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h

+1
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
274274
void SelectFP_EXTEND(SDNode *N);
275275
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
276276
void SelectDSBvhStackIntrinsic(SDNode *N);
277+
void SelectPOPSExitingWaveID(SDNode *N);
277278
void SelectDS_GWS(SDNode *N, unsigned IntrID);
278279
void SelectInterpP1F16(SDNode *N);
279280
void SelectINTRINSIC_W_CHAIN(SDNode *N);

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

+17
Original file line numberDiff line numberDiff line change
@@ -2079,6 +2079,21 @@ bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
20792079
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
20802080
}
20812081

2082+
bool AMDGPUInstructionSelector::selectPOPSExitingWaveID(
2083+
MachineInstr &MI) const {
2084+
Register Dst = MI.getOperand(0).getReg();
2085+
const DebugLoc &DL = MI.getDebugLoc();
2086+
MachineBasicBlock *MBB = MI.getParent();
2087+
2088+
// TODO: Select this with a tablegen pattern. This is tricky because the
2089+
// intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
2090+
// mayLoad/mayStore and tablegen complains about the mismatch.
2091+
auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), Dst)
2092+
.addDef(AMDGPU::SRC_POPS_EXITING_WAVE_ID);
2093+
MI.eraseFromParent();
2094+
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
2095+
}
2096+
20822097
bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
20832098
MachineInstr &I) const {
20842099
unsigned IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID();
@@ -2129,6 +2144,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
21292144
return selectSBarrierSignalIsfirst(I, IntrinsicID);
21302145
case Intrinsic::amdgcn_s_barrier_leave:
21312146
return selectSBarrierLeave(I);
2147+
case Intrinsic::amdgcn_pops_exiting_wave_id:
2148+
return selectPOPSExitingWaveID(I);
21322149
}
21332150
return selectImpl(I, *CoverageInfo);
21342151
}

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

+1
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
125125
bool selectDSAppendConsume(MachineInstr &MI, bool IsAppend) const;
126126
bool selectSBarrier(MachineInstr &MI) const;
127127
bool selectDSBvhStackIntrinsic(MachineInstr &MI) const;
128+
bool selectPOPSExitingWaveID(MachineInstr &MI) const;
128129

129130
bool selectImageIntrinsic(MachineInstr &MI,
130131
const AMDGPU::ImageDimIntrinsicInfo *Intr) const;

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -5132,6 +5132,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
51325132
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
51335133
break;
51345134
}
5135+
case Intrinsic::amdgcn_pops_exiting_wave_id:
5136+
return getDefaultMappingSOP(MI);
51355137
default:
51365138
return getInvalidInstructionMapping();
51375139
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefix=SDAG
3+
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefix=GFX9-GISEL
4+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefix=SDAG
5+
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefix=GFX10-GISEL
6+
7+
define amdgpu_ps void @test(ptr addrspace(1) inreg %ptr) {
8+
; SDAG-LABEL: test:
9+
; SDAG: ; %bb.0:
10+
; SDAG-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id
11+
; SDAG-NEXT: v_mov_b32_e32 v0, 0
12+
; SDAG-NEXT: v_mov_b32_e32 v1, s2
13+
; SDAG-NEXT: global_store_dword v0, v1, s[0:1]
14+
; SDAG-NEXT: s_endpgm
15+
;
16+
; GFX9-GISEL-LABEL: test:
17+
; GFX9-GISEL: ; %bb.0:
18+
; GFX9-GISEL-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id
19+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
20+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
21+
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
22+
; GFX9-GISEL-NEXT: s_endpgm
23+
;
24+
; GFX10-GISEL-LABEL: test:
25+
; GFX10-GISEL: ; %bb.0:
26+
; GFX10-GISEL-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id
27+
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
28+
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s2
29+
; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
30+
; GFX10-GISEL-NEXT: s_endpgm
31+
%id = call i32 @llvm.amdgcn.pops.exiting.wave.id()
32+
store i32 %id, ptr addrspace(1) %ptr
33+
ret void
34+
}

0 commit comments

Comments
 (0)