Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions clang/include/clang/Basic/BuiltinsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -513,11 +513,6 @@ def __builtin_amdgcn_cvt_pk_fp8_f32 : AMDGPUBuiltin<"int(float, float, int, _Con
def __builtin_amdgcn_cvt_sr_bf8_f32 : AMDGPUBuiltin<"int(float, int, int, _Constant int)", [Const], "fp8-conversion-insts">;
def __builtin_amdgcn_cvt_sr_fp8_f32 : AMDGPUBuiltin<"int(float, int, int, _Constant int)", [Const], "fp8-conversion-insts">;

//===----------------------------------------------------------------------===//
// SYCL builtin.
//===----------------------------------------------------------------------===//
def __builtin_amdgcn_implicit_offset: AMDGPUBuiltin<"unsigned int address_space<5> *()", [Const]>;

//===----------------------------------------------------------------------===//
// GFX950 only builtins.
//===----------------------------------------------------------------------===//
Expand Down
4 changes: 0 additions & 4 deletions clang/include/clang/Basic/BuiltinsNVPTX.td
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,6 @@ let Attributes = [NoThrow] in {

// MISC

let Attributes = [NoThrow, Const] in {
def __builtin_ptx_implicit_offset : NVPTXBuiltin<"unsigned int *()">;
}

def __nvvm_prmt : NVPTXBuiltin<"unsigned int(unsigned int, unsigned int, unsigned int)">;
let Attributes = [NoReturn] in {
def __nvvm_exit : NVPTXBuiltin<"void()">;
Expand Down
1 change: 0 additions & 1 deletion libclc/libspirv/lib/amdgcn-amdhsa/SOURCES
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ math/sincos.cl
math/sinh.cl
math/tan.cl
math/tanh.cl
workitem/get_global_offset.cl
workitem/get_global_size.cl
workitem/get_local_linear_id.cl
workitem/get_local_size.cl
Expand Down
23 changes: 0 additions & 23 deletions libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_global_offset.cl

This file was deleted.

1 change: 0 additions & 1 deletion libclc/libspirv/lib/ptx-nvidiacl/SOURCES
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ synchronization/aw_barrier.cl
async/async_work_group_strided_copy.cl
async/wait_group_events.cl
workitem/get_global_id.cl
workitem/get_global_offset.cl
workitem/get_global_size.cl
workitem/get_group_id.cl
workitem/get_local_id.cl
Expand Down
24 changes: 0 additions & 24 deletions libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_offset.cl

This file was deleted.

4 changes: 0 additions & 4 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -4273,10 +4273,6 @@ def int_amdgcn_addrspacecast_nonnull : DefaultAttrsIntrinsic<
[IntrNoMem, IntrSpeculatable]
>;

// SYCL
def int_amdgcn_implicit_offset : ClangBuiltin<"__builtin_amdgcn_implicit_offset">,
Intrinsic<[LLVMQualPointerType<5>], [], [IntrNoMem, IntrSpeculatable]>;

/// Make it clear to the backend that this value is really dead. For instance,
/// when used as an input to a phi node, it will make it possible for the
/// backend to allocate the dead lanes for operations within the corresponding
Expand Down
6 changes: 0 additions & 6 deletions llvm/include/llvm/IR/IntrinsicsNVVM.td
Original file line number Diff line number Diff line change
Expand Up @@ -2712,12 +2712,6 @@ foreach layout_a = ["row", "col"] in {
} // layout_b
} // layout_a

// SYCL
def int_nvvm_implicit_offset :
ClangBuiltin<"__builtin_ptx_implicit_offset">,
Intrinsic<[llvm_ptr_ty], [],
[IntrNoMem, IntrSpeculatable]>;

class NVVM_MMA_BLOCK_SCALE<WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D>
: Intrinsic<D.regs,
!listconcat(A.regs, B.regs, C.regs,
Expand Down
4 changes: 2 additions & 2 deletions llvm/include/llvm/SYCLLowerIR/GlobalOffset.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,10 @@ class GlobalOffsetPass : public PassInfoMixin<GlobalOffsetPass> {
/// processing twice.
llvm::DenseMap<Function *, Value *> ProcessedFunctions;
/// A type of implicit argument added to the kernel signature.
llvm::Type *KernelImplicitArgumentType = nullptr;
llvm::ArrayType *KernelImplicitArgumentType = nullptr;
/// A type used for the alloca holding the values of global offsets.
llvm::Type *ImplicitOffsetPtrType = nullptr;
/// Track newly created DISUbprograms (that are attached to cloned
/// Track newly created DISubprograms (that are attached to cloned
/// functions), for ease of mapping, use the old function's name as the key.
llvm::DenseMap<StringRef, DISubprogram *> DISubprogramMap;

Expand Down
141 changes: 26 additions & 115 deletions llvm/lib/SYCLLowerIR/GlobalOffset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,6 @@
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/PassManager.h"
#include "llvm/SYCLLowerIR/TargetHelpers.h"
#include "llvm/TargetParser/Triple.h"
Expand Down Expand Up @@ -60,55 +57,6 @@ ModulePass *llvm::createGlobalOffsetPassLegacy() {
return new GlobalOffsetLegacy();
}

// Helper function to collect all Uses of Load's pointer operand in post-order.
static void collectGlobalOffsetUses(Function *ImplicitOffsetIntrinsic,
SmallVectorImpl<Instruction *> &LoadPtrUses,
SmallVectorImpl<Instruction *> &Loads) {
SmallVector<Instruction *, 4> WorkList;
SmallPtrSet<Value *, 4> Visited;

// Find load instructions.
for (auto *U : ImplicitOffsetIntrinsic->users()) {
for (auto *U2 : cast<CallInst>(U)->users()) {
auto *I = cast<Instruction>(U2);
WorkList.push_back(I);
Visited.insert(I);
}
}
while (!WorkList.empty()) {
Instruction *I = WorkList.pop_back_val();
if (isa<PHINode>(I) || isa<GetElementPtrInst>(I)) {
for (User *U : I->users())
if (Visited.insert(U).second)
WorkList.push_back(cast<Instruction>(U));
}
if (isa<LoadInst>(I))
Loads.push_back(I);
}

// For each load, find its defs by post-order walking operand use.
Visited.clear();
for (auto *LI : Loads) {
Use *OpUse0 = &LI->getOperandUse(0);
auto PostOrderTraveral = [&](auto &Self, Use &U) -> void {
auto *I = cast<Instruction>(U.get());
Visited.insert(I);
for (auto &Op : I->operands()) {
auto *OpI = dyn_cast<Instruction>(Op.get());
if (!OpI || isa<CallInst>(OpI))
continue;
if (!Visited.contains(OpI))
Self(Self, Op);
}
if (!isa<CallInst>(I))
LoadPtrUses.push_back(I);
};
Visited.insert(LI);
if (!Visited.contains(OpUse0->get()))
PostOrderTraveral(PostOrderTraveral, *OpUse0);
}
}

static void validateKernels(Module &M, TargetHelpers::KernelCache &KCache) {
SmallVector<GlobalValue *, 4> Vec;
collectUsedGlobalVariables(M, Vec, /*CompilerUsed=*/false);
Expand All @@ -128,10 +76,9 @@ static void validateKernels(Module &M, TargetHelpers::KernelCache &KCache) {
}

void GlobalOffsetPass::createClonesAndPopulateVMap(
const TargetHelpers::KernelCache &KCache,
Function *ImplicitOffsetIntrinsic) {
const TargetHelpers::KernelCache &KCache, Function *GlobalOffsetFunc) {
std::deque<User *> WorkList;
for (auto *U : ImplicitOffsetIntrinsic->users())
for (auto *U : GlobalOffsetFunc->users())
WorkList.emplace_back(U);

while (!WorkList.empty()) {
Expand All @@ -156,8 +103,7 @@ void GlobalOffsetPass::createClonesAndPopulateVMap(
for (const auto &A : Func->args())
Arguments.push_back(A.getType());

// Add the offset argument. Must be the same type as returned by
// `llvm.{amdgcn|nvvm}.implicit.offset`.
// Add the offset argument.
Arguments.push_back(ImplicitArgumentType);

// Build the new function.
Expand Down Expand Up @@ -204,66 +150,39 @@ PreservedAnalyses GlobalOffsetPass::run(Module &M, ModuleAnalysisManager &) {
if (!T.isNVPTX() && !T.isAMDGCN())
return PreservedAnalyses::all();

Function *ImplicitOffsetIntrinsic = M.getFunction(Intrinsic::getName(
T.isNVPTX() ? static_cast<unsigned>(Intrinsic::nvvm_implicit_offset)
: static_cast<unsigned>(Intrinsic::amdgcn_implicit_offset)));
Function *GlobalOffsetFunc =
M.getFunction("_Z27__spirv_BuiltInGlobalOffseti");

if (!ImplicitOffsetIntrinsic || ImplicitOffsetIntrinsic->use_empty())
if (!GlobalOffsetFunc || GlobalOffsetFunc->use_empty())
return PreservedAnalyses::all();

if (EnableGlobalOffset) {
// For AMD allocas and pointers have to be to CONSTANT_PRIVATE (5), NVVM is
// happy with ADDRESS_SPACE_GENERIC (0).
TargetAS = T.isNVPTX() ? 0 : 5;
/// The value for NVVM's adDRESS_SPACE_SHARED and AMD's LOCAL_ADDRESS happen
/// to be 3, use it for the implicit argument pointer type.
KernelImplicitArgumentType =
ArrayType::get(Type::getInt32Ty(M.getContext()), 3);
ImplicitOffsetPtrType =
PointerType::get(Type::getInt32Ty(M.getContext()), TargetAS);
assert(
(ImplicitOffsetIntrinsic->getReturnType() == ImplicitOffsetPtrType) &&
"Implicit offset intrinsic does not return the expected type");
ImplicitOffsetPtrType = PointerType::get(M.getContext(), TargetAS);

TargetHelpers::KernelCache KCache;
KCache.populateKernels(M);
// Validate kernels
validateKernels(M, KCache);

createClonesAndPopulateVMap(KCache, ImplicitOffsetIntrinsic);
createClonesAndPopulateVMap(KCache, GlobalOffsetFunc);

// Add implicit parameters to all direct and indirect users of the offset
addImplicitParameterToCallers(M, ImplicitOffsetIntrinsic, nullptr, KCache);
addImplicitParameterToCallers(M, GlobalOffsetFunc, nullptr, KCache);
}
SmallVector<Instruction *, 4> Loads;
SmallVector<Instruction *, 4> PtrUses;

collectGlobalOffsetUses(ImplicitOffsetIntrinsic, PtrUses, Loads);

// Replace each use of a collected Load with a Constant 0
for (Instruction *L : Loads) {
L->replaceAllUsesWith(ConstantInt::get(L->getType(), 0));
L->eraseFromParent();
}

// Try to remove all collected Loads and their Defs from the kernel.
// PtrUses is returned by `collectGlobalOffsetUses` in topological order.
// Walk it backwards so we don't violate users.
for (auto *I : reverse(PtrUses)) {
// A Def might not be a GEP. Remove it if it has no use.
if (I->use_empty())
I->eraseFromParent();
for (auto *U : make_early_inc_range(GlobalOffsetFunc->users())) {
auto *CI = cast<CallInst>(U);
CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 0));
CI->eraseFromParent();
}

// Remove all collected CallInsts from the kernel.
for (auto *U : make_early_inc_range(ImplicitOffsetIntrinsic->users()))
cast<Instruction>(U)->eraseFromParent();

// Assert that all uses of `ImplicitOffsetIntrinsic` are removed and delete
// it.
assert(ImplicitOffsetIntrinsic->use_empty() &&
"Not all uses of intrinsic removed");
ImplicitOffsetIntrinsic->eraseFromParent();
GlobalOffsetFunc->eraseFromParent();

return PreservedAnalyses::none();
}
Expand Down Expand Up @@ -335,8 +254,17 @@ void GlobalOffsetPass::addImplicitParameterToCallers(
}
CallToOld = cast<CallInst>(GlobalVMap[CallToOld]);
if (!CalleeWithImplicitParam) {
// Replace intrinsic call with parameter.
CallToOld->replaceAllUsesWith(ImplicitOffset);
// Replace __spirv_BuiltInGlobalOffset call with load from parameter.
IRBuilder<> Builder(CallToOld);
Value *Index = CallToOld->getArgOperand(0);
Value *OffsetPtr = Builder.CreateInBoundsGEP(
KernelImplicitArgumentType->getElementType(), ImplicitOffset,
{Index});
Value *Offset = Builder.CreateLoad(
KernelImplicitArgumentType->getElementType(), OffsetPtr);
Value *OffsetCast = Builder.CreateIntCast(Offset, CallToOld->getType(),
/*isSigned*/ false);
CallToOld->replaceAllUsesWith(OffsetCast);
} else {
// Build up a list of arguments to call the modified function using.
SmallVector<Value *, 8> ImplicitOffsets;
Expand Down Expand Up @@ -376,7 +304,6 @@ void GlobalOffsetPass::addImplicitParameterToCallers(
std::pair<Function *, Value *> GlobalOffsetPass::addOffsetArgumentToFunction(
Module &M, Function *Func, Type *ImplicitArgumentType, bool KeepOriginal,
bool IsKernel) {
FunctionType *FuncTy = Func->getFunctionType();
const AttributeList &FuncAttrs = Func->getAttributes();
ImplicitArgumentType =
ImplicitArgumentType ? ImplicitArgumentType : ImplicitOffsetPtrType;
Expand All @@ -389,8 +316,7 @@ std::pair<Function *, Value *> GlobalOffsetPass::addOffsetArgumentToFunction(
ArgumentAttributes.push_back(FuncAttrs.getParamAttrs(I.index()));
}

// Add the offset argument. Must be the same type as returned by
// `llvm.{amdgcn|nvvm}.implicit.offset`.
// Add the offset argument.
Arguments.push_back(ImplicitArgumentType);
ArgumentAttributes.push_back(AttributeSet());

Expand All @@ -403,7 +329,6 @@ std::pair<Function *, Value *> GlobalOffsetPass::addOffsetArgumentToFunction(
Function *NewFunc = dyn_cast<Function>(GlobalVMap[Func]);

Value *ImplicitOffset = nullptr;
bool ImplicitOffsetAllocaInserted = false;
if (KeepOriginal) {
SmallVector<ReturnInst *, 8> Returns;
CloneFunctionInto(NewFunc, Func, GlobalVMap,
Expand Down Expand Up @@ -437,7 +362,6 @@ std::pair<Function *, Value *> GlobalOffsetPass::addOffsetArgumentToFunction(
AllocByteSize);
ImplicitOffset = ImplicitOffsetAlloca;
ImplicitArgumentType = ImplicitOffset->getType();
ImplicitOffsetAllocaInserted = true;
} else {
ImplicitOffset = std::prev(NewFunc->arg_end());
}
Expand Down Expand Up @@ -467,19 +391,6 @@ std::pair<Function *, Value *> GlobalOffsetPass::addOffsetArgumentToFunction(
}
assert(ImplicitOffset && "Value of implicit offset must be set.");

// Add bitcast to match the return type of the intrinsic if needed.
if (ImplicitArgumentType != ImplicitOffsetPtrType) {
BasicBlock *EntryBlock = &NewFunc->getEntryBlock();
// Make sure bitcast is inserted after alloca, if present.
BasicBlock::iterator InsertionPt =
ImplicitOffsetAllocaInserted
? std::next(cast<AllocaInst>(ImplicitOffset)->getIterator())
: EntryBlock->getFirstInsertionPt();
IRBuilder<> Builder(EntryBlock, InsertionPt);
ImplicitOffset = Builder.CreateBitCast(
ImplicitOffset, llvm::PointerType::get(M.getContext(), TargetAS));
}

ProcessedFunctions[Func] = ImplicitOffset;
Clones.insert(NewFunc);
// Return the new function and the offset argument.
Expand Down
Loading
Loading