Skip to content

Commit d81f611

Browse files
committed
[ICP] Add a few tunings to indirect-call-promtion
Indirect-call promotion (ICP) has been adjusted with the following tunings. (1) Candidate functions can be now ICP'd even if only a declaration is present. (2) All non-cold candidate functions are now considered by ICP. Previously, only hot targets were considered. (3) If one target cannot be ICP'd, proceed with the remaining targets instead of exiting the callsite. In this patch, all tunings are disabled by default. They will be enabled in a subsequent patch.
1 parent 93af944 commit d81f611

File tree

3 files changed

+255
-41
lines changed

3 files changed

+255
-41
lines changed

llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp

Lines changed: 100 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,8 @@ extern cl::opt<bool> EnableVTableProfileUse;
6363

6464
// Command line option to disable indirect-call promotion with the default as
6565
// false. This is for debug purpose.
66-
static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
67-
cl::desc("Disable indirect call promotion"));
66+
cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
67+
cl::desc("Disable indirect call promotion"));
6868

6969
// Set the cutoff value for the promotion. If the value is other than 0, we
7070
// stop the transformation once the total number of promotions equals the cutoff
@@ -80,6 +80,27 @@ static cl::opt<unsigned>
8080
ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden,
8181
cl::desc("Skip Callsite up to this number for this compilation"));
8282

83+
// ICP the candidate function even when only a declaration is present.
84+
static cl::opt<bool> ICPAllowDecls(
85+
"icp-allow-decls", cl::init(false), cl::Hidden,
86+
cl::desc("Promote the target candidate even when the defintion "
87+
" is not available"));
88+
89+
// ICP hot candidate functions only. When setting to false, non-cold functions
90+
// (warm functions) can also be promoted.
91+
static cl::opt<bool>
92+
ICPAllowHotOnly("icp-allow-hot-only", cl::init(true), cl::Hidden,
93+
cl::desc("Promote the target candidate only if it is a "
94+
"hot function. Otherwise, warm functions can "
95+
"also be promoted"));
96+
97+
// If one target cannot be ICP'd, proceed with the remaining targets instead
98+
// of exiting the callsite
99+
static cl::opt<bool> ICPAllowCandidateSkip(
100+
"icp-allow-candidate-skip", cl::init(false), cl::Hidden,
101+
cl::desc("Continue with the remaining targets instead of exiting "
102+
"when failing in a candidate"));
103+
83104
// Set if the pass is called in LTO optimization. The difference for LTO mode
84105
// is the pass won't prefix the source module name to the internal linkage
85106
// symbols.
@@ -330,6 +351,7 @@ class IndirectCallPromoter {
330351
struct PromotionCandidate {
331352
Function *const TargetFunction;
332353
const uint64_t Count;
354+
const uint32_t Index;
333355

334356
// The following fields only exists for promotion candidates with vtable
335357
// information.
@@ -341,7 +363,8 @@ class IndirectCallPromoter {
341363
VTableGUIDCountsMap VTableGUIDAndCounts;
342364
SmallVector<Constant *> AddressPoints;
343365

344-
PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
366+
PromotionCandidate(Function *F, uint64_t C, uint32_t I)
367+
: TargetFunction(F), Count(C), Index(I) {}
345368
};
346369

347370
// Check if the indirect-call call site should be promoted. Return the number
@@ -356,12 +379,10 @@ class IndirectCallPromoter {
356379
// Promote a list of targets for one indirect-call callsite by comparing
357380
// indirect callee with functions. Return true if there are IR
358381
// transformations and false otherwise.
359-
bool tryToPromoteWithFuncCmp(CallBase &CB, Instruction *VPtr,
360-
ArrayRef<PromotionCandidate> Candidates,
361-
uint64_t TotalCount,
362-
ArrayRef<InstrProfValueData> ICallProfDataRef,
363-
uint32_t NumCandidates,
364-
VTableGUIDCountsMap &VTableGUIDCounts);
382+
bool tryToPromoteWithFuncCmp(
383+
CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
384+
uint64_t TotalCount, MutableArrayRef<InstrProfValueData> ICallProfDataRef,
385+
uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts);
365386

366387
// Promote a list of targets for one indirect call by comparing vtables with
367388
// functions. Return true if there are IR transformations and false
@@ -394,7 +415,8 @@ class IndirectCallPromoter {
394415
Constant *getOrCreateVTableAddressPointVar(GlobalVariable *GV,
395416
uint64_t AddressPointOffset);
396417

397-
void updateFuncValueProfiles(CallBase &CB, ArrayRef<InstrProfValueData> VDs,
418+
void updateFuncValueProfiles(CallBase &CB,
419+
MutableArrayRef<InstrProfValueData> VDs,
398420
uint64_t Sum, uint32_t MaxMDCount);
399421

400422
void updateVPtrValueProfiles(Instruction *VPtr,
@@ -477,14 +499,31 @@ IndirectCallPromoter::getPromotionCandidatesForCallSite(
477499
// the case where the symbol is globally dead in the binary and removed by
478500
// ThinLTO.
479501
Function *TargetFunction = Symtab->getFunction(Target);
480-
if (TargetFunction == nullptr || TargetFunction->isDeclaration()) {
502+
if (TargetFunction == nullptr) {
481503
LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
482504
ORE.emit([&]() {
483505
return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
484506
<< "Cannot promote indirect call: target with md5sum "
485507
<< ore::NV("target md5sum", Target) << " not found";
486508
});
487-
break;
509+
if (ICPAllowCandidateSkip)
510+
continue;
511+
else
512+
break;
513+
}
514+
if (!ICPAllowDecls && TargetFunction->isDeclaration()) {
515+
LLVM_DEBUG(
516+
dbgs() << " Not promote: target definition is not available\n");
517+
ORE.emit([&]() {
518+
return OptimizationRemarkMissed(DEBUG_TYPE, "NoTargetDef", &CB)
519+
<< "Do not promote indirect call: target with md5sum "
520+
<< ore::NV("target md5sum", Target)
521+
<< " definition not available";
522+
});
523+
if (ICPAllowCandidateSkip)
524+
continue;
525+
else
526+
break;
488527
}
489528

490529
const char *Reason = nullptr;
@@ -497,10 +536,13 @@ IndirectCallPromoter::getPromotionCandidatesForCallSite(
497536
<< NV("TargetFunction", TargetFunction) << " with count of "
498537
<< NV("Count", Count) << ": " << Reason;
499538
});
500-
break;
539+
if (ICPAllowCandidateSkip)
540+
continue;
541+
else
542+
break;
501543
}
502544

503-
Ret.push_back(PromotionCandidate(TargetFunction, Count));
545+
Ret.push_back(PromotionCandidate(TargetFunction, Count, I));
504546
TotalCount -= Count;
505547
}
506548
return Ret;
@@ -642,7 +684,7 @@ CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee,
642684
// Promote indirect-call to conditional direct-call for one callsite.
643685
bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
644686
CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
645-
uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef,
687+
uint64_t TotalCount, MutableArrayRef<InstrProfValueData> ICallProfDataRef,
646688
uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts) {
647689
uint32_t NumPromoted = 0;
648690

@@ -655,6 +697,8 @@ bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
655697
NumOfPGOICallPromotion++;
656698
NumPromoted++;
657699

700+
// Update the count and this entry will be erased later.
701+
ICallProfDataRef[C.Index].Count = 0;
658702
if (!EnableVTableProfileUse || C.VTableGUIDAndCounts.empty())
659703
continue;
660704

@@ -679,21 +723,33 @@ bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
679723
"Number of promoted functions should not be greater than the number "
680724
"of values in profile metadata");
681725

682-
// Update value profiles on the indirect call.
683-
updateFuncValueProfiles(CB, ICallProfDataRef.slice(NumPromoted), TotalCount,
684-
NumCandidates);
726+
updateFuncValueProfiles(CB, ICallProfDataRef, TotalCount, NumCandidates);
685727
updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
686728
return true;
687729
}
688730

689731
void IndirectCallPromoter::updateFuncValueProfiles(
690-
CallBase &CB, ArrayRef<InstrProfValueData> CallVDs, uint64_t TotalCount,
691-
uint32_t MaxMDCount) {
732+
CallBase &CB, MutableArrayRef<InstrProfValueData> CallVDs,
733+
uint64_t TotalCount, uint32_t MaxMDCount) {
692734
// First clear the existing !prof.
693735
CB.setMetadata(LLVMContext::MD_prof, nullptr);
736+
737+
// Sort value profiles by count in descending order.
738+
llvm::stable_sort(CallVDs, [](const InstrProfValueData &LHS,
739+
const InstrProfValueData &RHS) {
740+
return LHS.Count > RHS.Count;
741+
});
742+
// Drop the <target-value, count> pair if count is zero.
743+
ArrayRef<InstrProfValueData> VDs(
744+
CallVDs.begin(),
745+
llvm::upper_bound(CallVDs, 0U,
746+
[](uint64_t Count, const InstrProfValueData &ProfData) {
747+
return ProfData.Count <= Count;
748+
}));
749+
694750
// Annotate the remaining value profiles if counter is not zero.
695751
if (TotalCount != 0)
696-
annotateValueSite(M, CB, CallVDs, TotalCount, IPVK_IndirectCallTarget,
752+
annotateValueSite(M, CB, VDs, TotalCount, IPVK_IndirectCallTarget,
697753
MaxMDCount);
698754
}
699755

@@ -726,7 +782,7 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
726782
uint64_t TotalFuncCount, uint32_t NumCandidates,
727783
MutableArrayRef<InstrProfValueData> ICallProfDataRef,
728784
VTableGUIDCountsMap &VTableGUIDCounts) {
729-
SmallVector<uint64_t, 4> PromotedFuncCount;
785+
SmallVector<std::pair<uint32_t, uint64_t>, 4> PromotedFuncCount;
730786

731787
for (const auto &Candidate : Candidates) {
732788
for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts)
@@ -771,7 +827,7 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
771827
return Remark;
772828
});
773829

774-
PromotedFuncCount.push_back(Candidate.Count);
830+
PromotedFuncCount.push_back({Candidate.Index, Candidate.Count});
775831

776832
assert(TotalFuncCount >= Candidate.Count &&
777833
"Within one prof metadata, total count is the sum of counts from "
@@ -792,22 +848,12 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
792848
// used to load multiple virtual functions. The vtable profiles needs to be
793849
// updated properly in that case (e.g, for each indirect call annotate both
794850
// type profiles and function profiles in one !prof).
795-
for (size_t I = 0; I < PromotedFuncCount.size(); I++)
796-
ICallProfDataRef[I].Count -=
797-
std::max(PromotedFuncCount[I], ICallProfDataRef[I].Count);
798-
// Sort value profiles by count in descending order.
799-
llvm::stable_sort(ICallProfDataRef, [](const InstrProfValueData &LHS,
800-
const InstrProfValueData &RHS) {
801-
return LHS.Count > RHS.Count;
802-
});
803-
// Drop the <target-value, count> pair if count is zero.
804-
ArrayRef<InstrProfValueData> VDs(
805-
ICallProfDataRef.begin(),
806-
llvm::upper_bound(ICallProfDataRef, 0U,
807-
[](uint64_t Count, const InstrProfValueData &ProfData) {
808-
return ProfData.Count <= Count;
809-
}));
810-
updateFuncValueProfiles(CB, VDs, TotalFuncCount, NumCandidates);
851+
for (size_t I = 0; I < PromotedFuncCount.size(); I++) {
852+
uint32_t Index = PromotedFuncCount[I].first;
853+
ICallProfDataRef[Index].Count -=
854+
std::max(PromotedFuncCount[I].second, ICallProfDataRef[Index].Count);
855+
}
856+
updateFuncValueProfiles(CB, ICallProfDataRef, TotalFuncCount, NumCandidates);
811857
updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
812858
return true;
813859
}
@@ -822,9 +868,22 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
822868
uint64_t TotalCount;
823869
auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
824870
CB, TotalCount, NumCandidates);
825-
if (!NumCandidates ||
826-
(PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
871+
if (!NumCandidates)
827872
continue;
873+
if (PSI && PSI->hasProfileSummary()) {
874+
// Don't promote cold candidates.
875+
if (PSI->isColdCount(TotalCount)) {
876+
LLVM_DEBUG(dbgs() << "Don't promote the cold candidate: TotalCount="
877+
<< TotalCount << "\n");
878+
continue;
879+
}
880+
// Only pormote hot if ICPAllowHotOnly is true.
881+
if (ICPAllowHotOnly && !PSI->isHotCount(TotalCount)) {
882+
LLVM_DEBUG(dbgs() << "Don't promote the non-hot candidate: TotalCount="
883+
<< TotalCount << "\n");
884+
continue;
885+
}
886+
}
828887

829888
auto PromotionCandidates = getPromotionCandidatesForCallSite(
830889
*CB, ICallProfDataRef, TotalCount, NumCandidates);

llvm/test/ThinLTO/X86/memprof-icp.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@
229229
; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \
230230
; RUN: -import-instr-limit=0 \
231231
; RUN: -memprof-require-definition-for-promotion \
232+
; RUN: -icp-allow-decls=false \
232233
; RUN: -enable-memprof-indirect-call-support=true \
233234
; RUN: -supports-hot-cold-new \
234235
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \

0 commit comments

Comments
 (0)