Skip to content

Commit b2ddd5c

Browse files
committed
[ICP] Add a few tunings to indirect-call-promtion
Indirect-call promotion (ICP) has been adjusted with the following tunings. (1) Candidate functions can be now ICP'd even if only a declaration is present. (2) All non-cold candidate functions are now considered by ICP. Previously, only hot targets were considered. (3) If one target cannot be ICP'd, proceed with the remaining targets instead of exiting the callsite. In this patch, all tunings are disabled by default. They will be enabled in a subsequent patch.
1 parent 93af944 commit b2ddd5c

File tree

3 files changed

+210
-7
lines changed

3 files changed

+210
-7
lines changed

llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp

Lines changed: 61 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,8 @@ extern cl::opt<bool> EnableVTableProfileUse;
6363

6464
// Command line option to disable indirect-call promotion with the default as
6565
// false. This is for debug purpose.
66-
static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
67-
cl::desc("Disable indirect call promotion"));
66+
cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
67+
cl::desc("Disable indirect call promotion"));
6868

6969
// Set the cutoff value for the promotion. If the value is other than 0, we
7070
// stop the transformation once the total number of promotions equals the cutoff
@@ -80,6 +80,27 @@ static cl::opt<unsigned>
8080
ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden,
8181
cl::desc("Skip Callsite up to this number for this compilation"));
8282

83+
// ICP the candidate function even when only a declaration is present.
84+
static cl::opt<bool> ICPAllowDeclOnly(
85+
"icp-allow-decl-only", cl::init(false), cl::Hidden,
86+
cl::desc("Promote the target candidate even when the defintion "
87+
" is not available"));
88+
89+
// ICP hot candidate functions only. When setting to false, non-cold functions
90+
// (warm functions) can also be promoted.
91+
static cl::opt<bool>
92+
ICPAllowHotOnly("icp-allow-hot-only", cl::init(true), cl::Hidden,
93+
cl::desc("Promote the target candidate only if it is a "
94+
"hot function. Otherwise, warm functions can "
95+
"also be promoted"));
96+
97+
// If one target cannot be ICP'd, proceed with the remaining targets instead
98+
// of exiting the callsite
99+
static cl::opt<bool> ICPAllowAllocSkip(
100+
"icp-allow-skip", cl::init(false), cl::Hidden,
101+
cl::desc("Continue with the remaining targets instead of exiting "
102+
"when failing in a candidate"));
103+
83104
// Set if the pass is called in LTO optimization. The difference for LTO mode
84105
// is the pass won't prefix the source module name to the internal linkage
85106
// symbols.
@@ -477,14 +498,31 @@ IndirectCallPromoter::getPromotionCandidatesForCallSite(
477498
// the case where the symbol is globally dead in the binary and removed by
478499
// ThinLTO.
479500
Function *TargetFunction = Symtab->getFunction(Target);
480-
if (TargetFunction == nullptr || TargetFunction->isDeclaration()) {
501+
if (TargetFunction == nullptr) {
481502
LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
482503
ORE.emit([&]() {
483504
return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
484505
<< "Cannot promote indirect call: target with md5sum "
485506
<< ore::NV("target md5sum", Target) << " not found";
486507
});
487-
break;
508+
if (ICPAllowAllocSkip)
509+
continue;
510+
else
511+
break;
512+
}
513+
if (!ICPAllowDeclOnly && TargetFunction->isDeclaration()) {
514+
LLVM_DEBUG(
515+
dbgs() << " Not promote: target definition is not available\n");
516+
ORE.emit([&]() {
517+
return OptimizationRemarkMissed(DEBUG_TYPE, "NoTargetDef", &CB)
518+
<< "Do not promote indirect call: target with md5sum "
519+
<< ore::NV("target md5sum", Target)
520+
<< " definition not available";
521+
});
522+
if (ICPAllowAllocSkip)
523+
continue;
524+
else
525+
break;
488526
}
489527

490528
const char *Reason = nullptr;
@@ -497,7 +535,10 @@ IndirectCallPromoter::getPromotionCandidatesForCallSite(
497535
<< NV("TargetFunction", TargetFunction) << " with count of "
498536
<< NV("Count", Count) << ": " << Reason;
499537
});
500-
break;
538+
if (ICPAllowAllocSkip)
539+
continue;
540+
else
541+
break;
501542
}
502543

503544
Ret.push_back(PromotionCandidate(TargetFunction, Count));
@@ -822,9 +863,22 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
822863
uint64_t TotalCount;
823864
auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
824865
CB, TotalCount, NumCandidates);
825-
if (!NumCandidates ||
826-
(PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
866+
if (!NumCandidates)
827867
continue;
868+
if (PSI && PSI->hasProfileSummary()) {
869+
// Don't promote cold candidates.
870+
if (PSI->isColdCount(TotalCount)) {
871+
LLVM_DEBUG(dbgs() << "Don't promote the cold candidate: TotalCount="
872+
<< TotalCount << "\n");
873+
continue;
874+
}
875+
// Only pormote hot if ICPAllowHotOnly is true.
876+
if (ICPAllowHotOnly && !PSI->isHotCount(TotalCount)) {
877+
LLVM_DEBUG(dbgs() << "Don't promote the non-hot candidate: TotalCount="
878+
<< TotalCount << "\n");
879+
continue;
880+
}
881+
}
828882

829883
auto PromotionCandidates = getPromotionCandidatesForCallSite(
830884
*CB, ICallProfDataRef, TotalCount, NumCandidates);

llvm/test/ThinLTO/X86/memprof-icp.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@
229229
; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \
230230
; RUN: -import-instr-limit=0 \
231231
; RUN: -memprof-require-definition-for-promotion \
232+
; RUN: -icp-allow-decl-only=false \
232233
; RUN: -enable-memprof-indirect-call-support=true \
233234
; RUN: -supports-hot-cold-new \
234235
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
; RUN: opt < %s -passes=pgo-icall-prom -icp-allow-decl-only=false -icp-allow-hot-only=true -icp-allow-skip=false -S -pass-remarks=pgo-icall-prom 2>&1 | FileCheck %s --check-prefix=REMARK1
2+
; RUN: opt < %s -passes=pgo-icall-prom -icp-allow-decl-only=true -icp-allow-hot-only=true -icp-allow-skip=false -S -pass-remarks=pgo-icall-prom 2>&1 | FileCheck %s --check-prefixes=REMARK1,REMARK2
3+
; RUN: opt < %s -passes=pgo-icall-prom -icp-allow-decl-only=false -icp-allow-hot-only=true -icp-allow-skip=false -S -pass-remarks=pgo-icall-prom 2>&1 | FileCheck %s --check-prefix=REMARK1
4+
; RUN: opt < %s -passes=pgo-icall-prom -icp-allow-decl-only=false -icp-allow-hot-only=false -icp-allow-skip=false -S -pass-remarks=pgo-icall-prom 2>&1 | FileCheck %s --check-prefixes=REMARK1
5+
; RUN: opt < %s -passes=pgo-icall-prom -icp-allow-decl-only=false -icp-allow-hot-only=false -icp-allow-skip=true -S -pass-remarks=pgo-icall-prom 2>&1 | FileCheck %s --check-prefixes=REMARK1,REMARK3
6+
; RUN: opt < %s -passes=pgo-icall-prom -icp-allow-hot-only=false -icp-allow-decl-only=true -icp-allow-skip=true -S -pass-remarks=pgo-icall-prom 2>&1 | FileCheck %s --check-prefixes=REMARK1,REMARK2,REMARK4,REMARK5
7+
8+
; REMARK2: remark: <unknown>:0:0: Promote indirect call to sub with count 50000 out of 50000
9+
; REMARK1: remark: <unknown>:0:0: Promote indirect call to add with count 10000 out of 10000
10+
; REMARK3: remark: <unknown>:0:0: Promote indirect call to add with count 200 out of 400
11+
; REMARK4: remark: <unknown>:0:0: Promote indirect call to sub with count 200 out of 400
12+
; REMARK5: remark: <unknown>:0:0: Promote indirect call to add with count 200 out of 200
13+
14+
@math = dso_local local_unnamed_addr global ptr null, align 8
15+
16+
define dso_local i32 @add(i32 noundef %a, i32 noundef %b) !prof !34 {
17+
entry:
18+
%add = add nsw i32 %a, %b
19+
ret i32 %add
20+
}
21+
22+
define dso_local range(i32 0, 2) i32 @main() !prof !35 {
23+
entry:
24+
call void @setup(i32 noundef 0)
25+
br label %for.cond
26+
27+
for.cond:
28+
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
29+
%sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
30+
%cmp = icmp samesign ult i32 %i.0, 50000
31+
br i1 %cmp, label %for.body, label %for.end, !prof !36
32+
33+
for.body:
34+
%0 = load ptr, ptr @math, align 8, !tbaa !37
35+
%call = call i32 %0(i32 noundef %i.0, i32 noundef %i.0), !prof !41
36+
%add = add nsw i32 %sum.0, %call
37+
%inc = add nuw nsw i32 %i.0, 1
38+
br label %for.cond, !llvm.loop !42
39+
40+
for.end:
41+
call void @setup(i32 noundef 1)
42+
br label %for.cond1
43+
44+
for.cond1:
45+
%i.1 = phi i32 [ 0, %for.end ], [ %inc7, %for.body3 ]
46+
%sum.1 = phi i32 [ %sum.0, %for.end ], [ %add5, %for.body3 ]
47+
%cmp2 = icmp samesign ult i32 %i.1, 10000
48+
br i1 %cmp2, label %for.body3, label %for.cond9, !prof !44
49+
50+
for.body3:
51+
%1 = load ptr, ptr @math, align 8, !tbaa !37
52+
%call4 = call i32 %1(i32 noundef %i.1, i32 noundef %i.1), !prof !45
53+
%add5 = add nsw i32 %sum.1, %call4
54+
%inc7 = add nuw nsw i32 %i.1, 1
55+
br label %for.cond1, !llvm.loop !46
56+
57+
for.cond9:
58+
%i.2 = phi i32 [ %inc15, %for.body11 ], [ 0, %for.cond1 ]
59+
%sum.2 = phi i32 [ %add13, %for.body11 ], [ %sum.1, %for.cond1 ]
60+
%cmp10 = icmp samesign ult i32 %i.2, 400
61+
br i1 %cmp10, label %for.body11, label %for.cond17, !prof !47
62+
63+
for.body11:
64+
call void @setup(i32 noundef %i.2)
65+
%2 = load ptr, ptr @math, align 8, !tbaa !37
66+
%call12 = call i32 %2(i32 noundef %i.2, i32 noundef %i.2), !prof !48
67+
%add13 = add nsw i32 %sum.2, %call12
68+
%inc15 = add nuw nsw i32 %i.2, 1
69+
br label %for.cond9, !llvm.loop !49
70+
71+
for.cond17:
72+
%i.3 = phi i32 [ %inc25, %for.body19 ], [ 0, %for.cond9 ]
73+
%sum.3 = phi i32 [ %add23, %for.body19 ], [ %sum.2, %for.cond9 ]
74+
%cmp18 = icmp samesign ult i32 %i.3, 400
75+
br i1 %cmp18, label %for.body19, label %for.end26, !prof !47
76+
77+
for.body19:
78+
%add.i = shl nuw nsw i32 %i.3, 1
79+
%add21 = add nsw i32 %sum.3, %add.i
80+
%call22 = call i32 @sub(i32 noundef %i.3, i32 noundef %i.3)
81+
%add23 = add nsw i32 %add21, %call22
82+
%inc25 = add nuw nsw i32 %i.3, 1
83+
br label %for.cond17, !llvm.loop !50
84+
85+
for.end26:
86+
%cmp27 = icmp slt i32 %sum.3, 11
87+
%. = zext i1 %cmp27 to i32
88+
ret i32 %.
89+
}
90+
91+
declare void @setup(i32 noundef)
92+
93+
declare i32 @sub(i32 noundef, i32 noundef)
94+
95+
!llvm.module.flags = !{!0, !1, !2, !3, !4}
96+
!llvm.ident = !{!33}
97+
98+
!0 = !{i32 1, !"wchar_size", i32 4}
99+
!1 = !{i32 8, !"PIC Level", i32 2}
100+
!2 = !{i32 7, !"PIE Level", i32 2}
101+
!3 = !{i32 7, !"uwtable", i32 2}
102+
!4 = !{i32 1, !"ProfileSummary", !5}
103+
!5 = !{!6, !7, !8, !9, !10, !11, !12, !13, !14, !15}
104+
!6 = !{!"ProfileFormat", !"InstrProf"}
105+
!7 = !{!"TotalCount", i64 122204}
106+
!8 = !{!"MaxCount", i64 50600}
107+
!9 = !{!"MaxInternalCount", i64 10000}
108+
!10 = !{!"MaxFunctionCount", i64 50600}
109+
!11 = !{!"NumCounts", i64 9}
110+
!12 = !{!"NumFunctions", i64 4}
111+
!13 = !{!"IsPartialProfile", i64 0}
112+
!14 = !{!"PartialProfileRatio", double 0.000000e+00}
113+
!15 = !{!"DetailedSummary", !16}
114+
!16 = !{!17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32}
115+
!17 = !{i32 10000, i64 50600, i32 1}
116+
!18 = !{i32 100000, i64 50600, i32 1}
117+
!19 = !{i32 200000, i64 50600, i32 1}
118+
!20 = !{i32 300000, i64 50600, i32 1}
119+
!21 = !{i32 400000, i64 50600, i32 1}
120+
!22 = !{i32 500000, i64 50000, i32 2}
121+
!23 = !{i32 600000, i64 50000, i32 2}
122+
!24 = !{i32 700000, i64 50000, i32 2}
123+
!25 = !{i32 800000, i64 50000, i32 2}
124+
!26 = !{i32 900000, i64 10200, i32 3}
125+
!27 = !{i32 950000, i64 10000, i32 4}
126+
!28 = !{i32 990000, i64 402, i32 5}
127+
!29 = !{i32 999000, i64 201, i32 8}
128+
!30 = !{i32 999900, i64 201, i32 8}
129+
!31 = !{i32 999990, i64 201, i32 8}
130+
!32 = !{i32 999999, i64 201, i32 8}
131+
!33 = !{!"clang version 22.0.0git ([email protected]:llvm/llvm-project.git ac20b28c2be26061e63dceac0915f97ece2273ac)"}
132+
!34 = !{!"function_entry_count", i64 10200}
133+
!35 = !{!"function_entry_count", i64 1}
134+
!36 = !{!"branch_weights", i32 50000, i32 1}
135+
!37 = !{!38, !38, i64 0}
136+
!38 = !{!"any pointer", !39, i64 0}
137+
!39 = !{!"omnipotent char", !40, i64 0}
138+
!40 = !{!"Simple C/C++ TBAA"}
139+
!41 = !{!"VP", i32 0, i64 50000, i64 -455885480058394486, i64 50000}
140+
!42 = distinct !{!42, !43}
141+
!43 = !{!"llvm.loop.mustprogress"}
142+
!44 = !{!"branch_weights", i32 10000, i32 1}
143+
!45 = !{!"VP", i32 0, i64 10000, i64 2232412992676883508, i64 10000}
144+
!46 = distinct !{!46, !43}
145+
!47 = !{!"branch_weights", i32 400, i32 1}
146+
!48 = !{!"VP", i32 0, i64 400, i64 -455885480058394486, i64 200, i64 2232412992676883508, i64 200}
147+
!49 = distinct !{!49, !43}
148+
!50 = distinct !{!50, !43}

0 commit comments

Comments
 (0)