-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[AMDGPU][UnifyDivergentExitNodes][StructurizeCFG] Add support for callbr instruction with inline-asm #152161
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/ro-i/callbr-amdgpu_1
Are you sure you want to change the base?
[AMDGPU][UnifyDivergentExitNodes][StructurizeCFG] Add support for callbr instruction with inline-asm #152161
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -480,11 +480,10 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) { | |
} else { | ||
// Test for successors as back edge | ||
BasicBlock *BB = N->getNodeAs<BasicBlock>(); | ||
BranchInst *Term = cast<BranchInst>(BB->getTerminator()); | ||
|
||
for (BasicBlock *Succ : Term->successors()) | ||
if (Visited.count(Succ)) | ||
Loops[Succ] = BB; | ||
if (BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator())) | ||
for (BasicBlock *Succ : Term->successors()) | ||
if (Visited.count(Succ)) | ||
Loops[Succ] = BB; | ||
} | ||
} | ||
|
||
|
@@ -516,7 +515,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) { | |
|
||
for (BasicBlock *P : predecessors(BB)) { | ||
// Ignore it if it's a branch from outside into our region entry | ||
if (!ParentRegion->contains(P)) | ||
if (!ParentRegion->contains(P) || !dyn_cast<BranchInst>(P->getTerminator())) | ||
continue; | ||
|
||
Region *R = RI->getRegionFor(P); | ||
|
@@ -1284,13 +1283,13 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) { | |
|
||
/// Run the transformation for each region found | ||
bool StructurizeCFG::run(Region *R, DominatorTree *DT) { | ||
if (R->isTopLevelRegion()) | ||
// CallBr and its corresponding blocks must not be modified by this pass. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is saying that if a region is headed by a |
||
if (R->isTopLevelRegion() || isa<CallBrInst>(R->getEntry()->getTerminator())) | ||
return false; | ||
|
||
this->DT = DT; | ||
|
||
Func = R->getEntry()->getParent(); | ||
assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator."); | ||
|
||
ParentRegion = R; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s | ||
|
||
define void @callbr_inline_asm(ptr %src, ptr %dst1, ptr %dst2, i32 %c) { | ||
; CHECK-LABEL: callbr_inline_asm: | ||
; CHECK: ; %bb.0: | ||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; CHECK-NEXT: flat_load_dword v0, v[0:1] | ||
; CHECK-NEXT: ;;#ASMSTART | ||
; CHECK-NEXT: v_cmp_gt_i32 vcc v6, 42; s_cbranch_vccnz .LBB0_2 | ||
; CHECK-NEXT: ;;#ASMEND | ||
; CHECK-NEXT: ; %bb.1: ; %fallthrough | ||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) | ||
; CHECK-NEXT: flat_store_dword v[2:3], v0 | ||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) | ||
; CHECK-NEXT: s_setpc_b64 s[30:31] | ||
; CHECK-NEXT: .LBB0_2: ; Inline asm indirect target | ||
; CHECK-NEXT: ; %indirect | ||
; CHECK-NEXT: ; Label of block must be emitted | ||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) | ||
; CHECK-NEXT: flat_store_dword v[4:5], v0 | ||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) | ||
; CHECK-NEXT: s_setpc_b64 s[30:31] | ||
%a = load i32, ptr %src, align 4 | ||
callbr void asm "v_cmp_gt_i32 vcc $0, 42; s_cbranch_vccnz ${1:l}", "r,!i"(i32 %c) to label %fallthrough [label %indirect] | ||
fallthrough: | ||
store i32 %a, ptr %dst1, align 4 | ||
br label %ret | ||
indirect: | ||
store i32 %a, ptr %dst2, align 4 | ||
br label %ret | ||
ret: | ||
ret void | ||
} | ||
|
||
define void @callbr_self_loop(i1 %c) { | ||
; CHECK-LABEL: callbr_self_loop: | ||
; CHECK: ; %bb.0: | ||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; CHECK-NEXT: .LBB1_1: ; %callbr | ||
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 | ||
; CHECK-NEXT: ;;#ASMSTART | ||
; CHECK-NEXT: ;;#ASMEND | ||
; CHECK-NEXT: s_branch .LBB1_1 | ||
; CHECK-NEXT: .LBB1_2: ; Inline asm indirect target | ||
; CHECK-NEXT: ; %callbr.target.ret | ||
; CHECK-NEXT: ; Label of block must be emitted | ||
; CHECK-NEXT: s_setpc_b64 s[30:31] | ||
br label %callbr | ||
callbr: | ||
callbr void asm "", "!i"() to label %callbr [label %ret] | ||
ret: | ||
ret void | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Everything in this file looks like unrelated style changes
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, this is actually important to support the asm label constraint "!i". This should fall through to the default case (imho) without causing
VT.getSizeInBits()
to crash.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you do this separately? You should be able to refer to labels in regular inline asm