Skip to content

[AMDGPU] Add alignment attr & propagate alignment through make.buffer.rsrc inst #145278

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-make-buffer-rsrc.cl
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_rsrc_p1_flags_constant(global voi

// CHECK-LABEL: @test_amdgcn_make_buffer_p0_nullptr(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr null, i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
// CHECK-NEXT: [[TMP0:%.*]] = tail call align 4294967296 ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p0(ptr null, i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
// CHECK-NEXT: ret ptr addrspace(8) [[TMP0]]
//
__amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_p0_nullptr(short stride, int num, int flags) {
Expand All @@ -85,7 +85,7 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_p0_nullptr(short stride, int num,

// CHECK-LABEL: @test_amdgcn_make_buffer_p1_nullptr(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) null, i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
// CHECK-NEXT: [[TMP0:%.*]] = tail call align 4294967296 ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) null, i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This value doesn't look right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @shiltian , the first operand of this is 0 pointer __builtin_amdgcn_make_buffer_rsrc((global void *)0LL, stride, num, flags);, so the attribute regard it as the maximum alignment...

// CHECK-NEXT: ret ptr addrspace(8) [[TMP0]]
//
__amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_p1_nullptr(short stride, int num, int flags) {
Expand Down
23 changes: 23 additions & 0 deletions llvm/include/llvm/Transforms/IPO/Attributor.h
Original file line number Diff line number Diff line change
Expand Up @@ -1356,8 +1356,15 @@ struct InformationCache {
/// Return the flat address space if the associated target has.
LLVM_ABI std::optional<unsigned> getFlatAddressSpace() const;

virtual bool shouldTrackUse(const AbstractAttribute *QueryingAA,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @shiltian , this is used to forward propagate (which is in initialization of each AAAlign).

When calling followUsesInMBEC if this returns true, it will put user's user into the list for checking known alignment

Value &AssociatedValue, const Use *U,
const Instruction *I) const {
return false;
}

virtual unsigned getMaxAddrSpace() const { return ~0U; }


private:
struct FunctionInfo {
LLVM_ABI ~FunctionInfo();
Expand Down Expand Up @@ -2045,6 +2052,19 @@ struct Attributor {
SimplificationCallbacks[IRP].emplace_back(CB);
}

using AlignmentCallbackTy =
std::function<void(const IRPosition &, const AbstractAttribute *,
SmallVectorImpl<AA::ValueAndContext> &)>;
void registerAlignmentCallback(const IRPosition &IRP,
const AlignmentCallbackTy &CB) {
AlignmentCallBacks[IRP].emplace_back(CB);
}

SmallVector<AlignmentCallbackTy, 1>
getAlignmentCallback(const IRPosition &IRP) {
return AlignmentCallBacks.lookup(IRP);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if the lookup fails? I'd prefer to do similar style as the simplification CB.

}

/// Return true if there is a simplification callback for \p IRP.
bool hasSimplificationCallback(const IRPosition &IRP) {
return SimplificationCallbacks.count(IRP);
Expand Down Expand Up @@ -2096,6 +2116,9 @@ struct Attributor {
DenseMap<IRPosition, SmallVector<SimplifictionCallbackTy, 1>>
SimplificationCallbacks;

/// The vector with AAAlign callbacks registered by outside AAs.
DenseMap<IRPosition, SmallVector<AlignmentCallbackTy, 1>> AlignmentCallBacks;

/// The vector with all simplification callbacks for global variables
/// registered by outside AAs.
DenseMap<const GlobalVariable *,
Expand Down
34 changes: 33 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,16 @@ class AMDGPUInformationCache : public InformationCache {
return ST.getMaxWavesPerEU();
}


bool shouldTrackUse(const AbstractAttribute *QueryingAA,
Value &AssociatedValue, const Use *U,
const Instruction *I) const override {
if (const auto *II = dyn_cast<IntrinsicInst>(I)) {
if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
return true;
}
return false;

unsigned getMaxAddrSpace() const override {
return AMDGPUAS::MAX_AMDGPU_ADDRESS;
}
Expand Down Expand Up @@ -1385,7 +1395,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
&AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
&AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
&AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID,
&AAIndirectCallInfo::ID, &AAInstanceInfo::ID});
&AAIndirectCallInfo::ID, &AAInstanceInfo::ID, &AAAlign::ID});

AttributorConfig AC(CGUpdater);
AC.IsClosedWorldModule = Options.IsClosedWorld;
Expand Down Expand Up @@ -1433,10 +1443,32 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
Ptr = RMW->getPointerOperand();
else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
Ptr = CmpX->getPointerOperand();
else if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc) {
IRPosition IRP = IRPosition::value(*II);

Attributor::AlignmentCallbackTy ACB =
[](const IRPosition &IRP, const AbstractAttribute *AA,
SmallVectorImpl<AA::ValueAndContext> &Values) {
Instruction *I = IRP.getCtxI();
if (!I)
return;
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
if (II->getIntrinsicID() ==
Intrinsic::amdgcn_make_buffer_rsrc)
Values.push_back(
AA::ValueAndContext{*I->getOperand(0), nullptr});
}
};
A.registerAlignmentCallback(IRP, ACB);

A.getOrCreateAAFor<AAAlign>(IRP);
}

if (Ptr) {
A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(*Ptr));

}
}
}
Expand Down
29 changes: 29 additions & 0 deletions llvm/lib/Transforms/IPO/AttributorAttributes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5204,6 +5204,10 @@ static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA,
TrackUse = true;
return 0;
}
if (A.getInfoCache().shouldTrackUse(&QueryingAA, AssociatedValue, U, I)) {
TrackUse = true;
return 0;
}

MaybeAlign MA;
if (const auto *CB = dyn_cast<CallBase>(I)) {
Expand Down Expand Up @@ -5502,7 +5506,32 @@ struct AAAlignCallSiteReturned final
using Base = AACalleeToCallSite<AAAlign, AAAlignImpl>;
AAAlignCallSiteReturned(const IRPosition &IRP, Attributor &A)
: Base(IRP, A) {}
ChangeStatus updateImpl(Attributor &A) override {
SmallVector<AA::ValueAndContext> Values;
SmallVector<Attributor::AlignmentCallbackTy, 1> AligmentCBs =
A.getAlignmentCallback(getIRPosition());

for (Attributor::AlignmentCallbackTy CB : AligmentCBs)
CB(getIRPosition(), this, Values);

if (!Values.empty()) {
StateType T;
for (AA::ValueAndContext &VAC : Values) {
const AAAlign *AA = A.getAAFor<AAAlign>(
*this, IRPosition::value(*VAC.getValue()), DepClassTy::REQUIRED);
if (AA && this != AA) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if that's a good idea to compare AA directly instead of comparing the associated value.

const AAAlign::StateType &DS = AA->getState();
T ^= DS;
}
if (!T.isValidState())
return indicatePessimisticFixpoint();
}

return clampStateAndIndicateChange(getState(), T);
}

return Base::updateImpl(A);
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(align); }
};
Expand Down
40 changes: 40 additions & 0 deletions llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s -o - | FileCheck %s

define float @align_back_prop(ptr addrspace(1) align 4 %x) {
; CHECK-LABEL: define float @align_back_prop(
; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[FAT_PTR:%.*]] = call align 8 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i32 256, i32 0)
; CHECK-NEXT: [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8
; CHECK-NEXT: ret float [[Y]]
;
%fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i32 256, i32 0)
%y = load float, ptr addrspace(7) %fat.ptr, align 8
ret float %y
}

define float @align_foward_prop(ptr addrspace(1) align 8 %x) {
; CHECK-LABEL: define float @align_foward_prop(
; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[FAT_PTR:%.*]] = call align 8 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i32 256, i32 0)
; CHECK-NEXT: [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8
; CHECK-NEXT: ret float [[Y]]
;
%fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i32 256, i32 0)
%y = load float, ptr addrspace(7) %fat.ptr, align 4
ret float %y
}

define float @align_mix_prop(ptr addrspace(1) align 4 %x) {
; CHECK-LABEL: define float @align_mix_prop(
; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[FAT_PTR:%.*]] = call align 8 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i32 256, i32 0)
; CHECK-NEXT: [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8
; CHECK-NEXT: [[Z:%.*]] = load float, ptr addrspace(1) [[X]], align 8
; CHECK-NEXT: ret float [[Z]]
;
%fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i32 256, i32 0)
%y = load float, ptr addrspace(7) %fat.ptr, align 2
%z = load float, ptr addrspace(1) %x, align 8
ret float %z
}
Loading