[AMDGPU] Add alignment attr & propagate alignment through make.buffer.rsrc inst #145278

Shoreshen · 2025-06-23T06:24:52Z

This PR mainly add alignment attribute to AMDGPU backend for make.buffer.rsrc. It will do:

Calculate the alignment from the users of make.buffer.rsrc
Calculate the alignment from the op 0 of make.buffer.rsrc
Propagate through the alignment

For the following example:

define float @align_back_prop(ptr addrspace(1) align 4 %x) {
  %fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i32 256, i32 0)
  %y = load float, ptr addrspace(7) %fat.ptr, align 8
  ret float %y
}

The PR will do:

For the users of make.buffer.rsrc, it requires the alignment of %fat.ptr is 8 (required by load)
For the op 0 of make.buffer.rsrc, it requires the alignment of %x is 4
Propagate the bigger alignment (which is 8 for %fat.ptr) through (to %x), so %x also have the alignment of 8

llvmbot · 2025-06-23T06:25:19Z

@llvm/pr-subscribers-clang
@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-amdgpu

Author: None (Shoreshen)

Changes

Adding alignment attribute
For make.buffer.rsrc intrinsic, propagate alignment attribute through the returned value and operand 0

Full diff: https://github.com/llvm/llvm-project/pull/145278.diff

4 Files Affected:

(modified) llvm/include/llvm/Transforms/IPO/Attributor.h (+22)
(modified) llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (+36-1)
(modified) llvm/lib/Transforms/IPO/AttributorAttributes.cpp (+12-2)
(added) llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll (+40)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index e6eb756df987d..64285c2114976 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1355,6 +1355,12 @@ struct InformationCache {
   /// Return the flat address space if the associated target has.
   LLVM_ABI std::optional<unsigned> getFlatAddressSpace() const;
 
+  virtual bool shouldTrackUse(const AbstractAttribute *QueryingAA,
+                              Value &AssociatedValue, const Use *U,
+                              const Instruction *I) const {
+    return false;
+  }
+
 private:
   struct FunctionInfo {
     LLVM_ABI ~FunctionInfo();
@@ -2042,6 +2048,19 @@ struct Attributor {
     SimplificationCallbacks[IRP].emplace_back(CB);
   }
 
+  using AlignmentCallbackTy =
+      std::function<void(const IRPosition &, const AbstractAttribute *,
+                         SmallVectorImpl<AA::ValueAndContext> &)>;
+  void registerAlignmentCallback(const IRPosition &IRP,
+                                 const AlignmentCallbackTy &CB) {
+    AlignmentCallBacks[IRP].emplace_back(CB);
+  }
+
+  SmallVector<AlignmentCallbackTy, 1>
+  getAlignmentCallback(const IRPosition &IRP) {
+    return AlignmentCallBacks.lookup(IRP);
+  }
+
   /// Return true if there is a simplification callback for \p IRP.
   bool hasSimplificationCallback(const IRPosition &IRP) {
     return SimplificationCallbacks.count(IRP);
@@ -2093,6 +2112,9 @@ struct Attributor {
   DenseMap<IRPosition, SmallVector<SimplifictionCallbackTy, 1>>
       SimplificationCallbacks;
 
+  /// The vector with AAAlign callbacks registered by outside AAs.
+  DenseMap<IRPosition, SmallVector<AlignmentCallbackTy, 1>> AlignmentCallBacks;
+
   /// The vector with all simplification callbacks for global variables
   /// registered by outside AAs.
   DenseMap<const GlobalVariable *,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index f4d3a014f9921..0731dcfbcd05c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -138,6 +138,18 @@ static bool funcRequiresHostcallPtr(const Function &F) {
          F.hasFnAttribute(Attribute::SanitizeMemTag);
 }
 
+static bool isAlignAndMakeBuffer(const AbstractAttribute *AA,
+                                 const Instruction *I) {
+  if (isa<AAAlign>(AA)) {
+    if (const auto *II = dyn_cast<IntrinsicInst>(I)) {
+      if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
+        return true;
+    }
+  }
+
+  return false;
+}
+
 namespace {
 class AMDGPUInformationCache : public InformationCache {
 public:
@@ -235,6 +247,12 @@ class AMDGPUInformationCache : public InformationCache {
     return ST.getMaxWavesPerEU();
   }
 
+  bool shouldTrackUse(const AbstractAttribute *QueryingAA,
+                      Value &AssociatedValue, const Use *U,
+                      const Instruction *I) const override {
+    return isAlignAndMakeBuffer(QueryingAA, I);
+  }
+
 private:
   /// Check if the ConstantExpr \p CE uses an addrspacecast from private or
   /// local to flat. These casts may require the queue pointer.
@@ -1381,7 +1399,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
        &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
        &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
        &AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
-       &AAInstanceInfo::ID});
+       &AAInstanceInfo::ID, &AAAlign::ID});
 
   AttributorConfig AC(CGUpdater);
   AC.IsClosedWorldModule = Options.IsClosedWorld;
@@ -1432,6 +1450,23 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
       } else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I)) {
         A.getOrCreateAAFor<AAAddressSpace>(
             IRPosition::value(*CmpX->getPointerOperand()));
+      } else if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+        if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc) {
+          IRPosition IRP = IRPosition::inst(*II);
+
+          Attributor::AlignmentCallbackTy ACB =
+              [](const IRPosition &IRP, const AbstractAttribute *AA,
+                 SmallVectorImpl<AA::ValueAndContext> &Values) {
+                if (auto *I = dyn_cast<Instruction>(&IRP.getAssociatedValue()))
+                  if (isAlignAndMakeBuffer(AA, I)) {
+                    Values.push_back(
+                        AA::ValueAndContext{*I->getOperand(0), nullptr});
+                  }
+              };
+          A.registerAlignmentCallback(IRP, ACB);
+
+          A.getOrCreateAAFor<AAAlign>(IRP);
+        }
       }
     }
   }
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 3799a696f67af..cca03b30e75c7 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -5202,6 +5202,10 @@ static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA,
       TrackUse = true;
     return 0;
   }
+  if (A.getInfoCache().shouldTrackUse(&QueryingAA, AssociatedValue, U, I)) {
+    TrackUse = true;
+    return 0;
+  }
 
   MaybeAlign MA;
   if (const auto *CB = dyn_cast<CallBase>(I)) {
@@ -5369,8 +5373,14 @@ struct AAAlignFloating : AAAlignImpl {
     bool Stripped;
     bool UsedAssumedInformation = false;
     SmallVector<AA::ValueAndContext> Values;
-    if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values,
-                                      AA::AnyScope, UsedAssumedInformation)) {
+    const auto &AligmentCBs = A.getAlignmentCallback(getIRPosition());
+    if (!AligmentCBs.empty()) {
+      for (const auto &CB : AligmentCBs) {
+        CB(getIRPosition(), this, Values);
+      }
+    } else if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values,
+                                             AA::AnyScope,
+                                             UsedAssumedInformation)) {
       Values.push_back({getAssociatedValue(), getCtxI()});
       Stripped = false;
     } else {
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll
new file mode 100644
index 0000000000000..85f77735bf2b6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt  -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s -o - | FileCheck %s
+
+define float @align_back_prop(ptr addrspace(1) align 4 %x) {
+; CHECK-LABEL: define float @align_back_prop(
+; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[FAT_PTR:%.*]] = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i32 256, i32 0)
+; CHECK-NEXT:    [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8
+; CHECK-NEXT:    ret float [[Y]]
+;
+  %fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i32 256, i32 0)
+  %y = load float, ptr addrspace(7) %fat.ptr, align 8
+  ret float %y
+}
+
+define float @align_foward_prop(ptr addrspace(1) align 8 %x) {
+; CHECK-LABEL: define float @align_foward_prop(
+; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[FAT_PTR:%.*]] = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i32 256, i32 0)
+; CHECK-NEXT:    [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8
+; CHECK-NEXT:    ret float [[Y]]
+;
+  %fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i32 256, i32 0)
+  %y = load float, ptr addrspace(7) %fat.ptr, align 4
+  ret float %y
+}
+
+define float @align_mix_prop(ptr addrspace(1) align 4 %x) {
+; CHECK-LABEL: define float @align_mix_prop(
+; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[FAT_PTR:%.*]] = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i32 256, i32 0)
+; CHECK-NEXT:    [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8
+; CHECK-NEXT:    [[Z:%.*]] = load float, ptr addrspace(1) [[X]], align 8
+; CHECK-NEXT:    ret float [[Z]]
+;
+  %fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i32 256, i32 0)
+  %y = load float, ptr addrspace(7) %fat.ptr, align 2
+  %z = load float, ptr addrspace(1) %x, align 8
+  ret float %z
+}

krzysz00

This seems reasonable overall though i'll leave final review to folks familiar with the attributor.

One question: how much work would it be to materialize the returned alignment as a attribute on the make.buffer.rsrc call? I figure it'll help make it more obvious what's going on.

shiltian

Some fundamental issues need to be resolved first.

shiltian · 2025-06-23T15:25:13Z

llvm/include/llvm/Transforms/IPO/Attributor.h

@@ -1355,6 +1355,12 @@ struct InformationCache {
  /// Return the flat address space if the associated target has.
  LLVM_ABI std::optional<unsigned> getFlatAddressSpace() const;

+  virtual bool shouldTrackUse(const AbstractAttribute *QueryingAA,


Why do we need this?

Hi @shiltian , this is used to forward propagate (which is in initialization of each AAAlign).

When calling followUsesInMBEC if this returns true, it will put user's user into the list for checking known alignment

shiltian · 2025-06-23T15:26:42Z

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

+              [](const IRPosition &IRP, const AbstractAttribute *AA,
+                 SmallVectorImpl<AA::ValueAndContext> &Values) {
+                if (auto *I = dyn_cast<Instruction>(&IRP.getAssociatedValue()))
+                  if (isAlignAndMakeBuffer(AA, I)) {


You don't need to check whether it is AAAign here since this CB is only for align.

shiltian · 2025-06-23T15:27:35Z

llvm/include/llvm/Transforms/IPO/Attributor.h

+
+  SmallVector<AlignmentCallbackTy, 1>
+  getAlignmentCallback(const IRPosition &IRP) {
+    return AlignmentCallBacks.lookup(IRP);


What if the lookup fails? I'd prefer to do similar style as the simplification CB.

arsenm

Description should be more clear about what this is. This isn't adding anything to the intrinsic itself.

I think the only thing special about this case is it's an intrinsic that propagates a pointer operand, similar to llvm.ptrmask. It looks like AAAlign also fails to propagate the alignment (which may have been improved by the ptrmask). Can you start by adding support for llvm.ptrmask, and then generalizing the support to other pointer intrinsics?

arsenm · 2025-06-23T07:41:56Z

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

@@ -138,6 +138,18 @@ static bool funcRequiresHostcallPtr(const Function &F) {
         F.hasFnAttribute(Attribute::SanitizeMemTag);
 }

+static bool isAlignAndMakeBuffer(const AbstractAttribute *AA,
+                                 const Instruction *I) {
+  if (isa<AAAlign>(AA)) {


Probably shouldn't need to ever identify AAs like this?

arsenm · 2025-06-23T07:45:49Z

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

@@ -1381,7 +1399,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
       &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
       &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
       &AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
-       &AAInstanceInfo::ID});
+       &AAInstanceInfo::ID, &AAAlign::ID});


Adding this to AMDGPUAttributor should be a separate patch, you can do this in the base attributor first

agreed. you can add support for some generic LLVM intrinsics and test it.

krzysz00 · 2025-06-24T15:24:19Z

Good point, seconding "let's teach AAAlign about ptrmask"

Shoreshen · 2025-06-25T05:08:46Z

This seems reasonable overall though i'll leave final review to folks familiar with the attributor.

One question: how much work would it be to materialize the returned alignment as a attribute on the make.buffer.rsrc call? I figure it'll help make it more obvious what's going on.

Hi @krzysz00 do you mean add alignment attribute on the call to intrinsic??

Shoreshen · 2025-06-25T05:41:55Z

Description should be more clear about what this is. This isn't adding anything to the intrinsic itself.

I think the only thing special about this case is it's an intrinsic that propagates a pointer operand, similar to llvm.ptrmask. It looks like AAAlign also fails to propagate the alignment (which may have been improved by the ptrmask). Can you start by adding support for llvm.ptrmask, and then generalizing the support to other pointer intrinsics?

Hi @arsenm , to support llvm.ptrmask need to update AAAlign's ability.

Currently the only thing we can do is to tell AAAlign that the alignment of the call is the same to some operand of the call.

By my understanding it is little bit different with llvm.ptrmask, for %ret = call ptr addrspace(7) @llvm.ptrmask.p7.i32(ptr addrspace(7) %p, i32 -4) it means ret = p & (-4), this need calculation based on the 2 operand......

arsenm · 2025-06-25T06:24:51Z

ptrmask shouldn't be any different than getelementptr: https://godbolt.org/z/nbe4x7qnd

It requires propagating the source attribute, and then interpreting it through the instruction to reach the return

shiltian · 2025-06-25T17:03:29Z

llvm/lib/Transforms/IPO/AttributorAttributes.cpp

+    SmallVector<AA::ValueAndContext> Values;
+    const auto &AligmentCBs = A.getAlignmentCallback(getIRPosition());
+
+    if (!AligmentCBs.empty()) {


since lookup will do a default construct, you don't need to check emptiness here.

shiltian · 2025-06-25T17:03:38Z

llvm/lib/Transforms/IPO/AttributorAttributes.cpp

+      for (const auto &CB : AligmentCBs) {
+        CB(getIRPosition(), this, Values);
+      }


Suggested change

for (const auto &CB : AligmentCBs) {

CB(getIRPosition(), this, Values);

}

for (const auto &CB : AligmentCBs)

CB(getIRPosition(), this, Values);

shiltian · 2025-06-25T17:04:09Z

llvm/lib/Transforms/IPO/AttributorAttributes.cpp

@@ -5500,7 +5504,34 @@ struct AAAlignCallSiteReturned final
  using Base = AACalleeToCallSite<AAAlign, AAAlignImpl>;
  AAAlignCallSiteReturned(const IRPosition &IRP, Attributor &A)
      : Base(IRP, A) {}
+  ChangeStatus updateImpl(Attributor &A) override {
+    SmallVector<AA::ValueAndContext> Values;
+    const auto &AligmentCBs = A.getAlignmentCallback(getIRPosition());


shiltian · 2025-06-25T17:04:20Z

llvm/lib/Transforms/IPO/AttributorAttributes.cpp

+
+      if (!Values.empty()) {
+        StateType T;
+        for (const auto &VAC : Values) {


shiltian · 2025-07-05T00:28:31Z

clang/test/CodeGenOpenCL/builtins-amdgcn-make-buffer-rsrc.cl

@@ -85,7 +85,7 @@ __amdgpu_buffer_rsrc_t test_amdgcn_make_buffer_p0_nullptr(short stride, int num,

 // CHECK-LABEL: @test_amdgcn_make_buffer_p1_nullptr(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) null, i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call align 4294967296 ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p8.p1(ptr addrspace(1) null, i16 [[STRIDE:%.*]], i32 [[NUM:%.*]], i32 [[FLAGS:%.*]])


This value doesn't look right?

Hi @shiltian , the first operand of this is 0 pointer __builtin_amdgcn_make_buffer_rsrc((global void *)0LL, stride, num, flags);, so the attribute regard it as the maximum alignment...

shiltian · 2025-07-07T02:46:17Z

llvm/lib/Transforms/IPO/AttributorAttributes.cpp

+      for (AA::ValueAndContext &VAC : Values) {
+        const AAAlign *AA = A.getAAFor<AAAlign>(
+            *this, IRPosition::value(*VAC.getValue()), DepClassTy::REQUIRED);
+        if (AA && this != AA) {


Not sure if that's a good idea to compare AA directly instead of comparing the associated value.

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Co-authored-by: Shilei Tian <[email protected]>

arsenm

This is still doing the target intrinsic instead of handling ptrmask first?

Shoreshen · 2025-07-07T15:07:07Z

This is still doing the target intrinsic instead of handling ptrmask first?

Hi @arsenm, I'm going to open a new PR for ptrmask. But since the backward propagate is in the initialization, it may cause some problem (depend on potential constant attr). I'll need to read the code and discuss with shilei on how to solve it~~

github-actions · 2025-07-08T09:13:04Z

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:

git-clang-format --diff HEAD~1 HEAD --extensions cl,h,cpp -- clang/test/CodeGenOpenCL/builtins-amdgcn-make-buffer-rsrc.cl llvm/include/llvm/Transforms/IPO/Attributor.h llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp llvm/lib/Transforms/IPO/AttributorAttributes.cpp

View the diff from clang-format here.

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 297dd296a..107ab0d1c 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1364,7 +1364,6 @@ struct InformationCache {
 
   virtual unsigned getMaxAddrSpace() const { return ~0U; }
 
-
 private:
   struct FunctionInfo {
     LLVM_ABI ~FunctionInfo();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index c7d43c80c..c4b7406ef 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -235,7 +235,6 @@ public:
     return ST.getMaxWavesPerEU();
   }
 
-
   bool shouldTrackUse(const AbstractAttribute *QueryingAA,
                       Value &AssociatedValue, const Use *U,
                       const Instruction *I) const override {
@@ -245,90 +244,90 @@ public:
     }
     return false;
 
-  unsigned getMaxAddrSpace() const override {
-    return AMDGPUAS::MAX_AMDGPU_ADDRESS;
-  }
-
-private:
-  /// Check if the ConstantExpr \p CE uses an addrspacecast from private or
-  /// local to flat. These casts may require the queue pointer.
-  static uint8_t visitConstExpr(const ConstantExpr *CE) {
-    uint8_t Status = NONE;
-
-    if (CE->getOpcode() == Instruction::AddrSpaceCast) {
-      unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
-      if (SrcAS == AMDGPUAS::PRIVATE_ADDRESS)
-        Status |= ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
-      else if (SrcAS == AMDGPUAS::LOCAL_ADDRESS)
-        Status |= ADDR_SPACE_CAST_LOCAL_TO_FLAT;
+    unsigned getMaxAddrSpace() const override {
+      return AMDGPUAS::MAX_AMDGPU_ADDRESS;
     }
 
-    return Status;
-  }
+  private:
+    /// Check if the ConstantExpr \p CE uses an addrspacecast from private or
+    /// local to flat. These casts may require the queue pointer.
+    static uint8_t visitConstExpr(const ConstantExpr *CE) {
+      uint8_t Status = NONE;
+
+      if (CE->getOpcode() == Instruction::AddrSpaceCast) {
+        unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
+        if (SrcAS == AMDGPUAS::PRIVATE_ADDRESS)
+          Status |= ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
+        else if (SrcAS == AMDGPUAS::LOCAL_ADDRESS)
+          Status |= ADDR_SPACE_CAST_LOCAL_TO_FLAT;
+      }
 
-  /// Returns the minimum amount of LDS space used by a workgroup running
-  /// function \p F.
-  static unsigned getLDSSize(const Function &F) {
-    return AMDGPU::getIntegerPairAttribute(F, "amdgpu-lds-size",
-                                           {0, UINT32_MAX}, true)
-        .first;
-  }
+      return Status;
+    }
+
+    /// Returns the minimum amount of LDS space used by a workgroup running
+    /// function \p F.
+    static unsigned getLDSSize(const Function &F) {
+      return AMDGPU::getIntegerPairAttribute(F, "amdgpu-lds-size",
+                                             {0, UINT32_MAX}, true)
+          .first;
+    }
 
-  /// Get the constant access bitmap for \p C.
-  uint8_t getConstantAccess(const Constant *C,
-                            SmallPtrSetImpl<const Constant *> &Visited) {
-    auto It = ConstantStatus.find(C);
-    if (It != ConstantStatus.end())
-      return It->second;
+    /// Get the constant access bitmap for \p C.
+    uint8_t getConstantAccess(const Constant *C,
+                              SmallPtrSetImpl<const Constant *> &Visited) {
+      auto It = ConstantStatus.find(C);
+      if (It != ConstantStatus.end())
+        return It->second;
 
-    uint8_t Result = 0;
-    if (isDSAddress(C))
-      Result = DS_GLOBAL;
+      uint8_t Result = 0;
+      if (isDSAddress(C))
+        Result = DS_GLOBAL;
 
-    if (const auto *CE = dyn_cast<ConstantExpr>(C))
-      Result |= visitConstExpr(CE);
+      if (const auto *CE = dyn_cast<ConstantExpr>(C))
+        Result |= visitConstExpr(CE);
 
-    for (const Use &U : C->operands()) {
-      const auto *OpC = dyn_cast<Constant>(U);
-      if (!OpC || !Visited.insert(OpC).second)
-        continue;
+      for (const Use &U : C->operands()) {
+        const auto *OpC = dyn_cast<Constant>(U);
+        if (!OpC || !Visited.insert(OpC).second)
+          continue;
 
-      Result |= getConstantAccess(OpC, Visited);
+        Result |= getConstantAccess(OpC, Visited);
+      }
+      return Result;
     }
-    return Result;
-  }
 
-public:
-  /// Returns true if \p Fn needs the queue pointer because of \p C.
-  bool needsQueuePtr(const Constant *C, Function &Fn) {
-    bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
-    bool HasAperture = hasApertureRegs(Fn);
+  public:
+    /// Returns true if \p Fn needs the queue pointer because of \p C.
+    bool needsQueuePtr(const Constant *C, Function &Fn) {
+      bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
+      bool HasAperture = hasApertureRegs(Fn);
 
-    // No need to explore the constants.
-    if (!IsNonEntryFunc && HasAperture)
-      return false;
+      // No need to explore the constants.
+      if (!IsNonEntryFunc && HasAperture)
+        return false;
 
-    SmallPtrSet<const Constant *, 8> Visited;
-    uint8_t Access = getConstantAccess(C, Visited);
+      SmallPtrSet<const Constant *, 8> Visited;
+      uint8_t Access = getConstantAccess(C, Visited);
 
-    // We need to trap on DS globals in non-entry functions.
-    if (IsNonEntryFunc && (Access & DS_GLOBAL))
-      return true;
+      // We need to trap on DS globals in non-entry functions.
+      if (IsNonEntryFunc && (Access & DS_GLOBAL))
+        return true;
 
-    return !HasAperture && (Access & ADDR_SPACE_CAST_BOTH_TO_FLAT);
-  }
+      return !HasAperture && (Access & ADDR_SPACE_CAST_BOTH_TO_FLAT);
+    }
 
-  bool checkConstForAddrSpaceCastFromPrivate(const Constant *C) {
-    SmallPtrSet<const Constant *, 8> Visited;
-    uint8_t Access = getConstantAccess(C, Visited);
-    return Access & ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
-  }
+    bool checkConstForAddrSpaceCastFromPrivate(const Constant *C) {
+      SmallPtrSet<const Constant *, 8> Visited;
+      uint8_t Access = getConstantAccess(C, Visited);
+      return Access & ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
+    }
 
-private:
-  /// Used to determine if the Constant needs the queue pointer.
-  DenseMap<const Constant *, uint8_t> ConstantStatus;
-  const unsigned CodeObjectVersion;
-};
+  private:
+    /// Used to determine if the Constant needs the queue pointer.
+    DenseMap<const Constant *, uint8_t> ConstantStatus;
+    const unsigned CodeObjectVersion;
+  };
 
 struct AAAMDAttributes
     : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
@@ -1465,12 +1464,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
           A.getOrCreateAAFor<AAAlign>(IRP);
         }
 
-      if (Ptr) {
-        A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
-        A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(*Ptr));
-
+        if (Ptr) {
+          A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
+          A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(*Ptr));
+        }
       }
-    }
   }
 
   bool Changed = A.run() == ChangeStatus::CHANGED;

Add alignment attr & propagate alignment through make.buffer.rsrc inst

888df54

llvmbot added backend:AMDGPU llvm:transforms labels Jun 23, 2025

Shoreshen requested review from shiltian, arsenm and krzysz00 June 23, 2025 06:25

krzysz00 reviewed Jun 23, 2025

View reviewed changes

shiltian reviewed Jun 23, 2025

View reviewed changes

Shoreshen added 2 commits June 24, 2025 10:25

Merge branch 'main' into add-align-for-make.buffer-intrinsic

122a2f7

fix shilei's comment

598533b

arsenm reviewed Jun 24, 2025

View reviewed changes

Merge branch 'main' into add-align-for-make.buffer-intrinsic

806dfca

fix matthew's comments

ce9453b

fix test case, null ptr propagate the maximum alignment

e950014

llvmbot added the clang Clang issues not falling into any other category label Jun 25, 2025

shiltian reviewed Jun 25, 2025

View reviewed changes

Shoreshen added 6 commits June 26, 2025 10:56

fix shilei's comment

dafa6df

Merge branch 'main' into add-align-for-make.buffer-intrinsic

fb747b8

Merge branch 'main' into add-align-for-make.buffer-intrinsic

af05cac

Merge branch 'main' into add-align-for-make.buffer-intrinsic

1c9c512

Merge branch 'main' into add-align-for-make.buffer-intrinsic

9ec3058

Merge branch 'main' into add-align-for-make.buffer-intrinsic

de979f9

Merge branch 'main' into add-align-for-make.buffer-intrinsic

9d5af0c

shiltian reviewed Jul 5, 2025

View reviewed changes

Merge branch 'main' into add-align-for-make.buffer-intrinsic

e8fba9e

shiltian reviewed Jul 7, 2025

View reviewed changes

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp Outdated Show resolved Hide resolved

Update llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

401f403

Co-authored-by: Shilei Tian <[email protected]>

arsenm reviewed Jul 7, 2025

View reviewed changes

Shoreshen added 2 commits July 8, 2025 08:51

Merge branch 'main' into add-align-for-make.buffer-intrinsic

f81c4b2

Merge branch 'main' into add-align-for-make.buffer-intrinsic

2af8229

[AMDGPU] Add alignment attr & propagate alignment through make.buffer.rsrc inst #145278

Are you sure you want to change the base?

[AMDGPU] Add alignment attr & propagate alignment through make.buffer.rsrc inst #145278

Conversation

Shoreshen commented Jun 23, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jun 23, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

krzysz00 left a comment

Choose a reason for hiding this comment

Uh oh!

shiltian left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

arsenm left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

shiltian Jun 25, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

krzysz00 commented Jun 24, 2025

Uh oh!

Shoreshen commented Jun 25, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Shoreshen commented Jun 25, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

arsenm commented Jun 25, 2025

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

arsenm left a comment

Choose a reason for hiding this comment

Uh oh!

Shoreshen commented Jul 7, 2025

Uh oh!

github-actions bot commented Jul 8, 2025

Uh oh!

Uh oh!

Shoreshen commented Jun 23, 2025 •

edited

Loading

llvmbot commented Jun 23, 2025 •

edited

Loading

shiltian Jun 25, 2025 •

edited

Loading

Shoreshen commented Jun 25, 2025 •

edited

Loading

Shoreshen commented Jun 25, 2025 •

edited

Loading