@@ -63,8 +63,8 @@ extern cl::opt<bool> EnableVTableProfileUse;
63
63
64
64
// Command line option to disable indirect-call promotion with the default as
65
65
// false. This is for debug purpose.
66
- static cl::opt<bool > DisableICP (" disable-icp" , cl::init(false ), cl::Hidden,
67
- cl::desc(" Disable indirect call promotion" ));
66
+ cl::opt<bool > DisableICP (" disable-icp" , cl::init(false ), cl::Hidden,
67
+ cl::desc(" Disable indirect call promotion" ));
68
68
69
69
// Set the cutoff value for the promotion. If the value is other than 0, we
70
70
// stop the transformation once the total number of promotions equals the cutoff
@@ -80,6 +80,27 @@ static cl::opt<unsigned>
80
80
ICPCSSkip (" icp-csskip" , cl::init(0 ), cl::Hidden,
81
81
cl::desc(" Skip Callsite up to this number for this compilation" ));
82
82
83
+ // ICP the candidate function even when only a declaration is present.
84
+ static cl::opt<bool > ICPAllowDecls (
85
+ " icp-allow-decls" , cl::init(false ), cl::Hidden,
86
+ cl::desc(" Promote the target candidate even when the defintion "
87
+ " is not available" ));
88
+
89
+ // ICP hot candidate functions only. When setting to false, non-cold functions
90
+ // (warm functions) can also be promoted.
91
+ static cl::opt<bool >
92
+ ICPAllowHotOnly (" icp-allow-hot-only" , cl::init(true ), cl::Hidden,
93
+ cl::desc(" Promote the target candidate only if it is a "
94
+ " hot function. Otherwise, warm functions can "
95
+ " also be promoted" ));
96
+
97
+ // If one target cannot be ICP'd, proceed with the remaining targets instead
98
+ // of exiting the callsite
99
+ static cl::opt<bool > ICPAllowCandidateSkip (
100
+ " icp-allow-candidate-skip" , cl::init(false ), cl::Hidden,
101
+ cl::desc(" Continue with the remaining targets instead of exiting "
102
+ " when failing in a candidate" ));
103
+
83
104
// Set if the pass is called in LTO optimization. The difference for LTO mode
84
105
// is the pass won't prefix the source module name to the internal linkage
85
106
// symbols.
@@ -330,6 +351,7 @@ class IndirectCallPromoter {
330
351
struct PromotionCandidate {
331
352
Function *const TargetFunction;
332
353
const uint64_t Count;
354
+ const uint32_t Index;
333
355
334
356
// The following fields only exists for promotion candidates with vtable
335
357
// information.
@@ -341,7 +363,8 @@ class IndirectCallPromoter {
341
363
VTableGUIDCountsMap VTableGUIDAndCounts;
342
364
SmallVector<Constant *> AddressPoints;
343
365
344
- PromotionCandidate (Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
366
+ PromotionCandidate (Function *F, uint64_t C, uint32_t I)
367
+ : TargetFunction(F), Count(C), Index(I) {}
345
368
};
346
369
347
370
// Check if the indirect-call call site should be promoted. Return the number
@@ -356,12 +379,10 @@ class IndirectCallPromoter {
356
379
// Promote a list of targets for one indirect-call callsite by comparing
357
380
// indirect callee with functions. Return true if there are IR
358
381
// transformations and false otherwise.
359
- bool tryToPromoteWithFuncCmp (CallBase &CB, Instruction *VPtr,
360
- ArrayRef<PromotionCandidate> Candidates,
361
- uint64_t TotalCount,
362
- ArrayRef<InstrProfValueData> ICallProfDataRef,
363
- uint32_t NumCandidates,
364
- VTableGUIDCountsMap &VTableGUIDCounts);
382
+ bool tryToPromoteWithFuncCmp (
383
+ CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
384
+ uint64_t TotalCount, MutableArrayRef<InstrProfValueData> ICallProfDataRef,
385
+ uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts);
365
386
366
387
// Promote a list of targets for one indirect call by comparing vtables with
367
388
// functions. Return true if there are IR transformations and false
@@ -394,7 +415,8 @@ class IndirectCallPromoter {
394
415
Constant *getOrCreateVTableAddressPointVar (GlobalVariable *GV,
395
416
uint64_t AddressPointOffset);
396
417
397
- void updateFuncValueProfiles (CallBase &CB, ArrayRef<InstrProfValueData> VDs,
418
+ void updateFuncValueProfiles (CallBase &CB,
419
+ MutableArrayRef<InstrProfValueData> VDs,
398
420
uint64_t Sum, uint32_t MaxMDCount);
399
421
400
422
void updateVPtrValueProfiles (Instruction *VPtr,
@@ -477,14 +499,31 @@ IndirectCallPromoter::getPromotionCandidatesForCallSite(
477
499
// the case where the symbol is globally dead in the binary and removed by
478
500
// ThinLTO.
479
501
Function *TargetFunction = Symtab->getFunction (Target);
480
- if (TargetFunction == nullptr || TargetFunction-> isDeclaration () ) {
502
+ if (TargetFunction == nullptr ) {
481
503
LLVM_DEBUG (dbgs () << " Not promote: Cannot find the target\n " );
482
504
ORE.emit ([&]() {
483
505
return OptimizationRemarkMissed (DEBUG_TYPE, " UnableToFindTarget" , &CB)
484
506
<< " Cannot promote indirect call: target with md5sum "
485
507
<< ore::NV (" target md5sum" , Target) << " not found" ;
486
508
});
487
- break ;
509
+ if (ICPAllowCandidateSkip)
510
+ continue ;
511
+ else
512
+ break ;
513
+ }
514
+ if (!ICPAllowDecls && TargetFunction->isDeclaration ()) {
515
+ LLVM_DEBUG (
516
+ dbgs () << " Not promote: target definition is not available\n " );
517
+ ORE.emit ([&]() {
518
+ return OptimizationRemarkMissed (DEBUG_TYPE, " NoTargetDef" , &CB)
519
+ << " Do not promote indirect call: target with md5sum "
520
+ << ore::NV (" target md5sum" , Target)
521
+ << " definition not available" ;
522
+ });
523
+ if (ICPAllowCandidateSkip)
524
+ continue ;
525
+ else
526
+ break ;
488
527
}
489
528
490
529
const char *Reason = nullptr ;
@@ -497,10 +536,13 @@ IndirectCallPromoter::getPromotionCandidatesForCallSite(
497
536
<< NV (" TargetFunction" , TargetFunction) << " with count of "
498
537
<< NV (" Count" , Count) << " : " << Reason;
499
538
});
500
- break ;
539
+ if (ICPAllowCandidateSkip)
540
+ continue ;
541
+ else
542
+ break ;
501
543
}
502
544
503
- Ret.push_back (PromotionCandidate (TargetFunction, Count));
545
+ Ret.push_back (PromotionCandidate (TargetFunction, Count, I ));
504
546
TotalCount -= Count;
505
547
}
506
548
return Ret;
@@ -642,7 +684,7 @@ CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee,
642
684
// Promote indirect-call to conditional direct-call for one callsite.
643
685
bool IndirectCallPromoter::tryToPromoteWithFuncCmp (
644
686
CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
645
- uint64_t TotalCount, ArrayRef <InstrProfValueData> ICallProfDataRef,
687
+ uint64_t TotalCount, MutableArrayRef <InstrProfValueData> ICallProfDataRef,
646
688
uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts) {
647
689
uint32_t NumPromoted = 0 ;
648
690
@@ -655,6 +697,8 @@ bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
655
697
NumOfPGOICallPromotion++;
656
698
NumPromoted++;
657
699
700
+ // Update the count and this entry will be erased later.
701
+ ICallProfDataRef[C.Index ].Count = 0 ;
658
702
if (!EnableVTableProfileUse || C.VTableGUIDAndCounts .empty ())
659
703
continue ;
660
704
@@ -679,21 +723,33 @@ bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
679
723
" Number of promoted functions should not be greater than the number "
680
724
" of values in profile metadata" );
681
725
682
- // Update value profiles on the indirect call.
683
- updateFuncValueProfiles (CB, ICallProfDataRef.slice (NumPromoted), TotalCount,
684
- NumCandidates);
726
+ updateFuncValueProfiles (CB, ICallProfDataRef, TotalCount, NumCandidates);
685
727
updateVPtrValueProfiles (VPtr, VTableGUIDCounts);
686
728
return true ;
687
729
}
688
730
689
731
void IndirectCallPromoter::updateFuncValueProfiles (
690
- CallBase &CB, ArrayRef <InstrProfValueData> CallVDs, uint64_t TotalCount ,
691
- uint32_t MaxMDCount) {
732
+ CallBase &CB, MutableArrayRef <InstrProfValueData> CallVDs,
733
+ uint64_t TotalCount, uint32_t MaxMDCount) {
692
734
// First clear the existing !prof.
693
735
CB.setMetadata (LLVMContext::MD_prof, nullptr );
736
+
737
+ // Sort value profiles by count in descending order.
738
+ llvm::stable_sort (CallVDs, [](const InstrProfValueData &LHS,
739
+ const InstrProfValueData &RHS) {
740
+ return LHS.Count > RHS.Count ;
741
+ });
742
+ // Drop the <target-value, count> pair if count is zero.
743
+ ArrayRef<InstrProfValueData> VDs (
744
+ CallVDs.begin (),
745
+ llvm::upper_bound (CallVDs, 0U ,
746
+ [](uint64_t Count, const InstrProfValueData &ProfData) {
747
+ return ProfData.Count <= Count;
748
+ }));
749
+
694
750
// Annotate the remaining value profiles if counter is not zero.
695
751
if (TotalCount != 0 )
696
- annotateValueSite (M, CB, CallVDs , TotalCount, IPVK_IndirectCallTarget,
752
+ annotateValueSite (M, CB, VDs , TotalCount, IPVK_IndirectCallTarget,
697
753
MaxMDCount);
698
754
}
699
755
@@ -726,7 +782,7 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
726
782
uint64_t TotalFuncCount, uint32_t NumCandidates,
727
783
MutableArrayRef<InstrProfValueData> ICallProfDataRef,
728
784
VTableGUIDCountsMap &VTableGUIDCounts) {
729
- SmallVector<uint64_t , 4 > PromotedFuncCount;
785
+ SmallVector<std::pair< uint32_t , uint64_t > , 4 > PromotedFuncCount;
730
786
731
787
for (const auto &Candidate : Candidates) {
732
788
for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts )
@@ -771,7 +827,7 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
771
827
return Remark;
772
828
});
773
829
774
- PromotedFuncCount.push_back (Candidate.Count );
830
+ PromotedFuncCount.push_back ({ Candidate.Index , Candidate. Count } );
775
831
776
832
assert (TotalFuncCount >= Candidate.Count &&
777
833
" Within one prof metadata, total count is the sum of counts from "
@@ -792,22 +848,12 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
792
848
// used to load multiple virtual functions. The vtable profiles needs to be
793
849
// updated properly in that case (e.g, for each indirect call annotate both
794
850
// type profiles and function profiles in one !prof).
795
- for (size_t I = 0 ; I < PromotedFuncCount.size (); I++)
796
- ICallProfDataRef[I].Count -=
797
- std::max (PromotedFuncCount[I], ICallProfDataRef[I].Count );
798
- // Sort value profiles by count in descending order.
799
- llvm::stable_sort (ICallProfDataRef, [](const InstrProfValueData &LHS,
800
- const InstrProfValueData &RHS) {
801
- return LHS.Count > RHS.Count ;
802
- });
803
- // Drop the <target-value, count> pair if count is zero.
804
- ArrayRef<InstrProfValueData> VDs (
805
- ICallProfDataRef.begin (),
806
- llvm::upper_bound (ICallProfDataRef, 0U ,
807
- [](uint64_t Count, const InstrProfValueData &ProfData) {
808
- return ProfData.Count <= Count;
809
- }));
810
- updateFuncValueProfiles (CB, VDs, TotalFuncCount, NumCandidates);
851
+ for (size_t I = 0 ; I < PromotedFuncCount.size (); I++) {
852
+ uint32_t Index = PromotedFuncCount[I].first ;
853
+ ICallProfDataRef[Index].Count -=
854
+ std::max (PromotedFuncCount[I].second , ICallProfDataRef[Index].Count );
855
+ }
856
+ updateFuncValueProfiles (CB, ICallProfDataRef, TotalFuncCount, NumCandidates);
811
857
updateVPtrValueProfiles (VPtr, VTableGUIDCounts);
812
858
return true ;
813
859
}
@@ -822,9 +868,22 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
822
868
uint64_t TotalCount;
823
869
auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction (
824
870
CB, TotalCount, NumCandidates);
825
- if (!NumCandidates ||
826
- (PSI && PSI->hasProfileSummary () && !PSI->isHotCount (TotalCount)))
871
+ if (!NumCandidates)
827
872
continue ;
873
+ if (PSI && PSI->hasProfileSummary ()) {
874
+ // Don't promote cold candidates.
875
+ if (PSI->isColdCount (TotalCount)) {
876
+ LLVM_DEBUG (dbgs () << " Don't promote the cold candidate: TotalCount="
877
+ << TotalCount << " \n " );
878
+ continue ;
879
+ }
880
+ // Only pormote hot if ICPAllowHotOnly is true.
881
+ if (ICPAllowHotOnly && !PSI->isHotCount (TotalCount)) {
882
+ LLVM_DEBUG (dbgs () << " Don't promote the non-hot candidate: TotalCount="
883
+ << TotalCount << " \n " );
884
+ continue ;
885
+ }
886
+ }
828
887
829
888
auto PromotionCandidates = getPromotionCandidatesForCallSite (
830
889
*CB, ICallProfDataRef, TotalCount, NumCandidates);
0 commit comments