@@ -340,7 +340,6 @@ Address CGNVCUDARuntime::prepareKernelArgsLLVMOffload(CodeGenFunction &CGF,
340340
341341 auto *Int64Ty = CGF.Builder .getInt64Ty ();
342342 KernelLaunchParamsTypes.push_back (Int64Ty);
343- KernelLaunchParamsTypes.push_back (Int64Ty);
344343 KernelLaunchParamsTypes.push_back (PtrTy);
345344 KernelLaunchParamsTypes.push_back (PtrTy);
346345
@@ -352,25 +351,21 @@ Address CGNVCUDARuntime::prepareKernelArgsLLVMOffload(CodeGenFunction &CGF,
352351 KernelLaunchParamsTy, CharUnits::fromQuantity (16 ),
353352 " kernel_launch_params" );
354353
355- auto KernelArgsSize = CGM.getDataLayout ().getTypeAllocSize (KernelArgsTy);
356-
357- // Avoid accounting the tail padding for CUDA.
358- auto KernelArgsSizeNoTailPadding = llvm::TypeSize::getZero ();
354+ // Avoid accounting the tail padding for the kernel arguments.
355+ auto KernelArgsSize = llvm::TypeSize::getZero ();
359356 if (auto N = KernelArgsTy->getNumElements ()) {
360357 auto *SL = CGM.getDataLayout ().getStructLayout (KernelArgsTy);
361- KernelArgsSizeNoTailPadding = SL->getElementOffset (N - 1 );
362- KernelArgsSizeNoTailPadding += CGM.getDataLayout ().getTypeAllocSize (
358+ KernelArgsSize + = SL->getElementOffset (N - 1 );
359+ KernelArgsSize += CGM.getDataLayout ().getTypeAllocSize (
363360 KernelArgsTy->getElementType (N - 1 ));
364361 }
365362
366363 CGF.Builder .CreateStore (llvm::ConstantInt::get (Int64Ty, KernelArgsSize),
367364 CGF.Builder .CreateStructGEP (KernelLaunchParams, 0 ));
368- CGF.Builder .CreateStore (llvm::ConstantInt::get (Int64Ty, KernelArgsSizeNoTailPadding),
369- CGF.Builder .CreateStructGEP (KernelLaunchParams, 1 ));
370365 CGF.Builder .CreateStore (KernelArgs.emitRawPointer (CGF),
371- CGF.Builder .CreateStructGEP (KernelLaunchParams, 2 ));
366+ CGF.Builder .CreateStructGEP (KernelLaunchParams, 1 ));
372367 CGF.Builder .CreateStore (llvm::Constant::getNullValue (PtrTy),
373- CGF.Builder .CreateStructGEP (KernelLaunchParams, 3 ));
368+ CGF.Builder .CreateStructGEP (KernelLaunchParams, 2 ));
374369
375370 for (unsigned i = 0 ; i < Args.size (); ++i) {
376371 auto *ArgVal = CGF.Builder .CreateLoad (CGF.GetAddrOfLocalVar (Args[i]));
0 commit comments