Skip to content

Commit 08dfe94

Browse files
committed
Use the same args size (without tail padding) for AMDGPU
1 parent 6e3b496 commit 08dfe94

File tree

5 files changed

+9
-21
lines changed

5 files changed

+9
-21
lines changed

clang/lib/CodeGen/CGCUDANV.cpp

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,6 @@ Address CGNVCUDARuntime::prepareKernelArgsLLVMOffload(CodeGenFunction &CGF,
340340

341341
auto *Int64Ty = CGF.Builder.getInt64Ty();
342342
KernelLaunchParamsTypes.push_back(Int64Ty);
343-
KernelLaunchParamsTypes.push_back(Int64Ty);
344343
KernelLaunchParamsTypes.push_back(PtrTy);
345344
KernelLaunchParamsTypes.push_back(PtrTy);
346345

@@ -352,25 +351,21 @@ Address CGNVCUDARuntime::prepareKernelArgsLLVMOffload(CodeGenFunction &CGF,
352351
KernelLaunchParamsTy, CharUnits::fromQuantity(16),
353352
"kernel_launch_params");
354353

355-
auto KernelArgsSize = CGM.getDataLayout().getTypeAllocSize(KernelArgsTy);
356-
357-
// Avoid accounting the tail padding for CUDA.
358-
auto KernelArgsSizeNoTailPadding = llvm::TypeSize::getZero();
354+
// Avoid accounting the tail padding for the kernel arguments.
355+
auto KernelArgsSize = llvm::TypeSize::getZero();
359356
if (auto N = KernelArgsTy->getNumElements()) {
360357
auto *SL = CGM.getDataLayout().getStructLayout(KernelArgsTy);
361-
KernelArgsSizeNoTailPadding = SL->getElementOffset(N - 1);
362-
KernelArgsSizeNoTailPadding += CGM.getDataLayout().getTypeAllocSize(
358+
KernelArgsSize += SL->getElementOffset(N - 1);
359+
KernelArgsSize += CGM.getDataLayout().getTypeAllocSize(
363360
KernelArgsTy->getElementType(N - 1));
364361
}
365362

366363
CGF.Builder.CreateStore(llvm::ConstantInt::get(Int64Ty, KernelArgsSize),
367364
CGF.Builder.CreateStructGEP(KernelLaunchParams, 0));
368-
CGF.Builder.CreateStore(llvm::ConstantInt::get(Int64Ty, KernelArgsSizeNoTailPadding),
369-
CGF.Builder.CreateStructGEP(KernelLaunchParams, 1));
370365
CGF.Builder.CreateStore(KernelArgs.emitRawPointer(CGF),
371-
CGF.Builder.CreateStructGEP(KernelLaunchParams, 2));
366+
CGF.Builder.CreateStructGEP(KernelLaunchParams, 1));
372367
CGF.Builder.CreateStore(llvm::Constant::getNullValue(PtrTy),
373-
CGF.Builder.CreateStructGEP(KernelLaunchParams, 3));
368+
CGF.Builder.CreateStructGEP(KernelLaunchParams, 2));
374369

375370
for (unsigned i = 0; i < Args.size(); ++i) {
376371
auto *ArgVal = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(Args[i]));

offload/include/Shared/APITypes.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,10 +119,8 @@ static_assert(sizeof(KernelArgsTy) ==
119119

120120
/// Flat array of kernel launch parameters and their total size.
121121
struct KernelLaunchParamsTy {
122-
/// Size of the Data array.
122+
/// Size of the Data array without the tail padding.
123123
size_t Size = 0;
124-
/// Size of the Data array without tail padding.
125-
size_t SizeNoTailPadding = 0;
126124
/// Flat array of kernel parameters.
127125
void *Data = nullptr;
128126
/// Ptrs to the Data entries. Only strictly required for the host plugin.

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3658,11 +3658,6 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
36583658
KernelArgsTy &KernelArgs,
36593659
KernelLaunchParamsTy LaunchParams,
36603660
AsyncInfoWrapperTy &AsyncInfoWrapper) const {
3661-
if (ArgsSize != LaunchParams.Size &&
3662-
ArgsSize > LaunchParams.Size + getImplicitArgsSize())
3663-
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
3664-
"invalid kernel arguments size");
3665-
36663661
AMDGPUPluginTy &AMDGPUPlugin =
36673662
static_cast<AMDGPUPluginTy &>(GenericDevice.Plugin);
36683663
AMDHostDeviceTy &HostDevice = AMDGPUPlugin.getHostDevice();

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -573,7 +573,7 @@ KernelLaunchParamsTy GenericKernelTy::prepareArgs(
573573
}
574574

575575
size_t ArgsSize = sizeof(void *) * NumArgs;
576-
return KernelLaunchParamsTy{ArgsSize, ArgsSize, &Args[0], &Ptrs[0]};
576+
return KernelLaunchParamsTy{ArgsSize, &Args[0], &Ptrs[0]};
577577
}
578578

579579
uint32_t GenericKernelTy::getNumThreads(GenericDeviceTy &GenericDevice,

offload/plugins-nextgen/cuda/src/rtl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1414,7 +1414,7 @@ Error CUDAKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
14141414

14151415
void *Config[] = {CU_LAUNCH_PARAM_BUFFER_POINTER, LaunchParams.Data,
14161416
CU_LAUNCH_PARAM_BUFFER_SIZE,
1417-
reinterpret_cast<void *>(&LaunchParams.SizeNoTailPadding),
1417+
reinterpret_cast<void *>(&LaunchParams.Size),
14181418
CU_LAUNCH_PARAM_END};
14191419

14201420
// If we are running an RPC server we want to wake up the server thread

0 commit comments

Comments
 (0)