Skip to content

Commit 27905ee

Browse files
committed
[Attributor] Change AAExecutionDomain to check intrinsic edges
The AAExecutionDomain instance checks if a BB is executed by the main thread only. Currently, this only checks the `__kmpc_kernel_init` call for generic regions to indicate the path taken by the main thread. In the new runtime, we want to be able to detect basic blocks even in SPMD mode. For this we enable it to check thread-ID intrinsics being compared to zero as well. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D109849
1 parent fec2927 commit 27905ee

File tree

2 files changed

+30
-14
lines changed

2 files changed

+30
-14
lines changed

llvm/lib/Transforms/IPO/OpenMPOpt.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
#include "llvm/IR/GlobalValue.h"
3434
#include "llvm/IR/Instruction.h"
3535
#include "llvm/IR/IntrinsicInst.h"
36+
#include "llvm/IR/IntrinsicsAMDGPU.h"
37+
#include "llvm/IR/IntrinsicsNVPTX.h"
3638
#include "llvm/InitializePasses.h"
3739
#include "llvm/Support/CommandLine.h"
3840
#include "llvm/Transforms/IPO.h"
@@ -2554,9 +2556,8 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
25542556
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
25552557
auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
25562558

2557-
// Check if the edge into the successor block compares the __kmpc_target_init
2558-
// result with -1. If we are in non-SPMD-mode that signals only the main
2559-
// thread will execute the edge.
2559+
// Check if the edge into the successor block contains a condition that only
2560+
// lets the main thread execute it.
25602561
auto IsInitialThreadOnly = [&](BranchInst *Edge, BasicBlock *SuccessorBB) {
25612562
if (!Edge || !Edge->isConditional())
25622563
return false;
@@ -2571,7 +2572,7 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
25712572
if (!C)
25722573
return false;
25732574

2574-
// Match: -1 == __kmpc_target_init (for non-SPMD kernels only!)
2575+
// Match: -1 == __kmpc_target_init (for non-SPMD kernels only!)
25752576
if (C->isAllOnesValue()) {
25762577
auto *CB = dyn_cast<CallBase>(Cmp->getOperand(0));
25772578
CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr;
@@ -2583,6 +2584,18 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
25832584
return IsSPMDModeCI && IsSPMDModeCI->isZero();
25842585
}
25852586

2587+
if (C->isZero()) {
2588+
// Match: 0 == llvm.nvvm.read.ptx.sreg.tid.x()
2589+
if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
2590+
if (II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_tid_x)
2591+
return true;
2592+
2593+
// Match: 0 == llvm.amdgcn.workitem.id.x()
2594+
if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
2595+
if (II->getIntrinsicID() == Intrinsic::amdgcn_workitem_id_x)
2596+
return true;
2597+
}
2598+
25862599
return false;
25872600
};
25882601

llvm/test/Transforms/OpenMP/single_threaded_execution.ll

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ define void @kernel() {
1818
%cmp = icmp eq i32 %call, -1
1919
br i1 %cmp, label %if.then, label %if.else
2020
if.then:
21-
call void @nvptx()
22-
call void @amdgcn()
2321
br label %if.end
2422
if.else:
2523
br label %if.end
@@ -31,13 +29,15 @@ if.end:
3129
; REMARKS: remark: single_threaded_execution.c:1:0: Could not internalize function. Some optimizations may not be possible.
3230
; REMARKS-NOT: remark: single_threaded_execution.c:1:0: Could not internalize function. Some optimizations may not be possible.
3331

34-
; CHECK-DAG: [openmp-opt] Basic block @nvptx entry is executed by a single thread.
32+
; CHECK-NOT: [openmp-opt] Basic block @nvptx entry is executed by a single thread.
3533
; CHECK-DAG: [openmp-opt] Basic block @nvptx if.then is executed by a single thread.
36-
; CHECK-DAG: [openmp-opt] Basic block @nvptx if.end is executed by a single thread.
34+
; CHECK-NOT: [openmp-opt] Basic block @nvptx if.end is executed by a single thread.
3735
; Function Attrs: noinline
38-
define internal void @nvptx() {
36+
define void @nvptx() {
3937
entry:
40-
br i1 true, label %if.then, label %if.end
38+
%call = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
39+
%cmp = icmp eq i32 %call, 0
40+
br i1 %cmp, label %if.then, label %if.end
4141

4242
if.then:
4343
call void @foo()
@@ -50,13 +50,15 @@ if.end:
5050
ret void
5151
}
5252

53-
; CHECK-DAG: [openmp-opt] Basic block @amdgcn entry is executed by a single thread.
53+
; CHECK-NOT: [openmp-opt] Basic block @amdgcn entry is executed by a single thread.
5454
; CHECK-DAG: [openmp-opt] Basic block @amdgcn if.then is executed by a single thread.
55-
; CHECK-DAG: [openmp-opt] Basic block @amdgcn if.end is executed by a single thread.
55+
; CHECK-NOT: [openmp-opt] Basic block @amdgcn if.end is executed by a single thread.
5656
; Function Attrs: noinline
57-
define internal void @amdgcn() {
57+
define void @amdgcn() {
5858
entry:
59-
br i1 false, label %if.then, label %if.end
59+
%call = call i32 @llvm.amdgcn.workitem.id.x()
60+
%cmp = icmp eq i32 %call, 0
61+
br i1 %cmp, label %if.then, label %if.end
6062

6163
if.then:
6264
call void @foo()
@@ -104,6 +106,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
104106
declare void @__kmpc_kernel_prepare_parallel(i8*)
105107

106108
declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1)
109+
107110
declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1)
108111

109112
attributes #0 = { cold noinline }

0 commit comments

Comments
 (0)