@@ -254,8 +254,8 @@ Register SIMachineFunctionInfo::addLDSKernelId() {
254
254
SmallVectorImpl<MCRegister> *SIMachineFunctionInfo::addPreloadedKernArg (
255
255
const SIRegisterInfo &TRI, const TargetRegisterClass *RC,
256
256
unsigned AllocSizeDWord, int KernArgIdx, int PaddingSGPRs) {
257
- assert (! ArgInfo.PreloadKernArgs .count (KernArgIdx) &&
258
- " Preload kernel argument allocated twice." );
257
+ auto [It, Inserted] = ArgInfo.PreloadKernArgs .try_emplace (KernArgIdx);
258
+ assert (Inserted && " Preload kernel argument allocated twice." );
259
259
NumUserSGPRs += PaddingSGPRs;
260
260
// If the available register tuples are aligned with the kernarg to be
261
261
// preloaded use that register, otherwise we need to use a set of SGPRs and
@@ -264,20 +264,22 @@ SmallVectorImpl<MCRegister> *SIMachineFunctionInfo::addPreloadedKernArg(
264
264
ArgInfo.FirstKernArgPreloadReg = getNextUserSGPR ();
265
265
Register PreloadReg =
266
266
TRI.getMatchingSuperReg (getNextUserSGPR (), AMDGPU::sub0, RC);
267
+ auto &Regs = It->second .Regs ;
267
268
if (PreloadReg &&
268
269
(RC == &AMDGPU::SReg_32RegClass || RC == &AMDGPU::SReg_64RegClass)) {
269
- ArgInfo. PreloadKernArgs [KernArgIdx]. Regs .push_back (PreloadReg);
270
+ Regs.push_back (PreloadReg);
270
271
NumUserSGPRs += AllocSizeDWord;
271
272
} else {
273
+ Regs.reserve (AllocSizeDWord);
272
274
for (unsigned I = 0 ; I < AllocSizeDWord; ++I) {
273
- ArgInfo. PreloadKernArgs [KernArgIdx]. Regs .push_back (getNextUserSGPR ());
275
+ Regs.push_back (getNextUserSGPR ());
274
276
NumUserSGPRs++;
275
277
}
276
278
}
277
279
278
280
// Track the actual number of SGPRs that HW will preload to.
279
281
UserSGPRInfo.allocKernargPreloadSGPRs (AllocSizeDWord + PaddingSGPRs);
280
- return &ArgInfo. PreloadKernArgs [KernArgIdx]. Regs ;
282
+ return &Regs;
281
283
}
282
284
283
285
void SIMachineFunctionInfo::allocateWWMSpill (MachineFunction &MF, Register VGPR,
0 commit comments