Skip to content

Commit 66524b3

Browse files
committed
[ICP] Add a few tunings to indirect-call-promtion
Indirect-call promotion (ICP) has been adjusted with the following tunings. (1) Candidate functions can be now ICP'd even if only a declaration is present. (2) All non-cold candidate functions are now considered by ICP. Previously, only hot targets were considered. In this patch, both tunings are disabled by default. They will be enabled in a subsequent patch.
1 parent 93af944 commit 66524b3

File tree

3 files changed

+188
-3
lines changed

3 files changed

+188
-3
lines changed

llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,20 @@ static cl::opt<unsigned>
8080
ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden,
8181
cl::desc("Skip Callsite up to this number for this compilation"));
8282

83+
// ICP the candidate function even when only a declaration is present.
84+
static cl::opt<bool> ICPAllowDeclOnly(
85+
"icp-allow-decl-only", cl::init(false), cl::Hidden,
86+
cl::desc("Promote the target candidate even when the defintion "
87+
" is not available"));
88+
89+
// ICP hot candidate functions only. When setting to false, non-cold functions
90+
// (warm functions) can also be promoted.
91+
static cl::opt<bool>
92+
ICPAllowHotOnly("icp-allow-hot-only", cl::init(true), cl::Hidden,
93+
cl::desc("Promote the target candidate only if it is a "
94+
"hot function. Otherwise, warm functions can "
95+
"also be promoted"));
96+
8397
// Set if the pass is called in LTO optimization. The difference for LTO mode
8498
// is the pass won't prefix the source module name to the internal linkage
8599
// symbols.
@@ -477,7 +491,7 @@ IndirectCallPromoter::getPromotionCandidatesForCallSite(
477491
// the case where the symbol is globally dead in the binary and removed by
478492
// ThinLTO.
479493
Function *TargetFunction = Symtab->getFunction(Target);
480-
if (TargetFunction == nullptr || TargetFunction->isDeclaration()) {
494+
if (TargetFunction == nullptr) {
481495
LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
482496
ORE.emit([&]() {
483497
return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
@@ -486,6 +500,17 @@ IndirectCallPromoter::getPromotionCandidatesForCallSite(
486500
});
487501
break;
488502
}
503+
if (!ICPAllowDeclOnly && TargetFunction->isDeclaration()) {
504+
LLVM_DEBUG(
505+
dbgs() << " Not promote: target definition is not available\n");
506+
ORE.emit([&]() {
507+
return OptimizationRemarkMissed(DEBUG_TYPE, "NoTargetDef", &CB)
508+
<< "Do not promote indirect call: target with md5sum "
509+
<< ore::NV("target md5sum", Target)
510+
<< " definition not available";
511+
});
512+
break;
513+
}
489514

490515
const char *Reason = nullptr;
491516
if (!isLegalToPromote(CB, TargetFunction, &Reason)) {
@@ -822,9 +847,22 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
822847
uint64_t TotalCount;
823848
auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
824849
CB, TotalCount, NumCandidates);
825-
if (!NumCandidates ||
826-
(PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
850+
if (!NumCandidates)
827851
continue;
852+
if (PSI && PSI->hasProfileSummary()) {
853+
// Don't promote cold candidates.
854+
if (PSI->isColdCount(TotalCount)) {
855+
LLVM_DEBUG(dbgs() << "Don't promote the cold candidate: TotalCount="
856+
<< TotalCount << "\n");
857+
continue;
858+
}
859+
// Only pormote hot if ICPAllowHotOnly is true.
860+
if (ICPAllowHotOnly && !PSI->isHotCount(TotalCount)) {
861+
LLVM_DEBUG(dbgs() << "Don't promote the non-hot candidate: TotalCount="
862+
<< TotalCount << "\n");
863+
continue;
864+
}
865+
}
828866

829867
auto PromotionCandidates = getPromotionCandidatesForCallSite(
830868
*CB, ICallProfDataRef, TotalCount, NumCandidates);

llvm/test/ThinLTO/X86/memprof-icp.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@
229229
; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \
230230
; RUN: -import-instr-limit=0 \
231231
; RUN: -memprof-require-definition-for-promotion \
232+
; RUN: -icp-allow-decl-only=false \
232233
; RUN: -enable-memprof-indirect-call-support=true \
233234
; RUN: -supports-hot-cold-new \
234235
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
; RUN: opt < %s -passes=pgo-icall-prom -icp-allow-decl-only=false -S -pass-remarks=pgo-icall-prom 2>&1 | FileCheck %s --check-prefix=REMARK1
2+
; RUN: opt < %s -passes=pgo-icall-prom -icp-allow-decl-only=true -S -pass-remarks=pgo-icall-prom 2>&1 | FileCheck %s --check-prefixes=REMARK1,REMARK2
3+
; RUN: opt < %s -passes=pgo-icall-prom -icp-allow-hot-only=true -S -pass-remarks=pgo-icall-prom 2>&1 | FileCheck %s --check-prefix=REMARK1
4+
; RUN: opt < %s -passes=pgo-icall-prom -icp-allow-hot-only=false -S -pass-remarks=pgo-icall-prom 2>&1 | FileCheck %s --check-prefixes=REMARK1,REMARK3
5+
; RUN: opt < %s -passes=pgo-icall-prom -icp-allow-hot-only=false -icp-allow-decl-only=true -S -pass-remarks=pgo-icall-prom 2>&1 | FileCheck %s --check-prefixes=REMARK1,REMARK2,REMARK3,REMARK4
6+
7+
; REMARK2: remark: <unknown>:0:0: Promote indirect call to sub with count 50000 out of 50000
8+
; REMARK1: remark: <unknown>:0:0: Promote indirect call to add with count 10000 out of 10000
9+
; REMARK3: remark: <unknown>:0:0: Promote indirect call to add with count 200 out of 400
10+
; REMARK4: remark: <unknown>:0:0: Promote indirect call to sub with count 200 out of 200
11+
12+
@math = dso_local local_unnamed_addr global ptr null, align 8
13+
14+
define dso_local i32 @add(i32 noundef %a, i32 noundef %b) !prof !34 {
15+
entry:
16+
%add = add nsw i32 %a, %b
17+
ret i32 %add
18+
}
19+
20+
define dso_local range(i32 0, 2) i32 @main() !prof !35 {
21+
entry:
22+
call void @setup(i32 noundef 0)
23+
br label %for.cond
24+
25+
for.cond:
26+
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
27+
%sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
28+
%cmp = icmp samesign ult i32 %i.0, 50000
29+
br i1 %cmp, label %for.body, label %for.end, !prof !36
30+
31+
for.body:
32+
%0 = load ptr, ptr @math, align 8, !tbaa !37
33+
%call = call i32 %0(i32 noundef %i.0, i32 noundef %i.0), !prof !41
34+
%add = add nsw i32 %sum.0, %call
35+
%inc = add nuw nsw i32 %i.0, 1
36+
br label %for.cond, !llvm.loop !42
37+
38+
for.end:
39+
call void @setup(i32 noundef 1)
40+
br label %for.cond1
41+
42+
for.cond1:
43+
%i.1 = phi i32 [ 0, %for.end ], [ %inc7, %for.body3 ]
44+
%sum.1 = phi i32 [ %sum.0, %for.end ], [ %add5, %for.body3 ]
45+
%cmp2 = icmp samesign ult i32 %i.1, 10000
46+
br i1 %cmp2, label %for.body3, label %for.cond9, !prof !44
47+
48+
for.body3:
49+
%1 = load ptr, ptr @math, align 8, !tbaa !37
50+
%call4 = call i32 %1(i32 noundef %i.1, i32 noundef %i.1), !prof !45
51+
%add5 = add nsw i32 %sum.1, %call4
52+
%inc7 = add nuw nsw i32 %i.1, 1
53+
br label %for.cond1, !llvm.loop !46
54+
55+
for.cond9:
56+
%i.2 = phi i32 [ %inc15, %for.body11 ], [ 0, %for.cond1 ]
57+
%sum.2 = phi i32 [ %add13, %for.body11 ], [ %sum.1, %for.cond1 ]
58+
%cmp10 = icmp samesign ult i32 %i.2, 400
59+
br i1 %cmp10, label %for.body11, label %for.cond17, !prof !47
60+
61+
for.body11:
62+
call void @setup(i32 noundef %i.2)
63+
%2 = load ptr, ptr @math, align 8, !tbaa !37
64+
%call12 = call i32 %2(i32 noundef %i.2, i32 noundef %i.2), !prof !48
65+
%add13 = add nsw i32 %sum.2, %call12
66+
%inc15 = add nuw nsw i32 %i.2, 1
67+
br label %for.cond9, !llvm.loop !49
68+
69+
for.cond17:
70+
%i.3 = phi i32 [ %inc25, %for.body19 ], [ 0, %for.cond9 ]
71+
%sum.3 = phi i32 [ %add23, %for.body19 ], [ %sum.2, %for.cond9 ]
72+
%cmp18 = icmp samesign ult i32 %i.3, 400
73+
br i1 %cmp18, label %for.body19, label %for.end26, !prof !47
74+
75+
for.body19:
76+
%add.i = shl nuw nsw i32 %i.3, 1
77+
%add21 = add nsw i32 %sum.3, %add.i
78+
%call22 = call i32 @sub(i32 noundef %i.3, i32 noundef %i.3)
79+
%add23 = add nsw i32 %add21, %call22
80+
%inc25 = add nuw nsw i32 %i.3, 1
81+
br label %for.cond17, !llvm.loop !50
82+
83+
for.end26:
84+
%cmp27 = icmp slt i32 %sum.3, 11
85+
%. = zext i1 %cmp27 to i32
86+
ret i32 %.
87+
}
88+
89+
declare void @setup(i32 noundef)
90+
91+
declare i32 @sub(i32 noundef, i32 noundef)
92+
93+
!llvm.module.flags = !{!0, !1, !2, !3, !4}
94+
!llvm.ident = !{!33}
95+
96+
!0 = !{i32 1, !"wchar_size", i32 4}
97+
!1 = !{i32 8, !"PIC Level", i32 2}
98+
!2 = !{i32 7, !"PIE Level", i32 2}
99+
!3 = !{i32 7, !"uwtable", i32 2}
100+
!4 = !{i32 1, !"ProfileSummary", !5}
101+
!5 = !{!6, !7, !8, !9, !10, !11, !12, !13, !14, !15}
102+
!6 = !{!"ProfileFormat", !"InstrProf"}
103+
!7 = !{!"TotalCount", i64 122204}
104+
!8 = !{!"MaxCount", i64 50600}
105+
!9 = !{!"MaxInternalCount", i64 10000}
106+
!10 = !{!"MaxFunctionCount", i64 50600}
107+
!11 = !{!"NumCounts", i64 9}
108+
!12 = !{!"NumFunctions", i64 4}
109+
!13 = !{!"IsPartialProfile", i64 0}
110+
!14 = !{!"PartialProfileRatio", double 0.000000e+00}
111+
!15 = !{!"DetailedSummary", !16}
112+
!16 = !{!17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32}
113+
!17 = !{i32 10000, i64 50600, i32 1}
114+
!18 = !{i32 100000, i64 50600, i32 1}
115+
!19 = !{i32 200000, i64 50600, i32 1}
116+
!20 = !{i32 300000, i64 50600, i32 1}
117+
!21 = !{i32 400000, i64 50600, i32 1}
118+
!22 = !{i32 500000, i64 50000, i32 2}
119+
!23 = !{i32 600000, i64 50000, i32 2}
120+
!24 = !{i32 700000, i64 50000, i32 2}
121+
!25 = !{i32 800000, i64 50000, i32 2}
122+
!26 = !{i32 900000, i64 10200, i32 3}
123+
!27 = !{i32 950000, i64 10000, i32 4}
124+
!28 = !{i32 990000, i64 402, i32 5}
125+
!29 = !{i32 999000, i64 201, i32 8}
126+
!30 = !{i32 999900, i64 201, i32 8}
127+
!31 = !{i32 999990, i64 201, i32 8}
128+
!32 = !{i32 999999, i64 201, i32 8}
129+
!33 = !{!"clang version 22.0.0git ([email protected]:llvm/llvm-project.git ac20b28c2be26061e63dceac0915f97ece2273ac)"}
130+
!34 = !{!"function_entry_count", i64 10200}
131+
!35 = !{!"function_entry_count", i64 1}
132+
!36 = !{!"branch_weights", i32 50000, i32 1}
133+
!37 = !{!38, !38, i64 0}
134+
!38 = !{!"any pointer", !39, i64 0}
135+
!39 = !{!"omnipotent char", !40, i64 0}
136+
!40 = !{!"Simple C/C++ TBAA"}
137+
!41 = !{!"VP", i32 0, i64 50000, i64 -455885480058394486, i64 50000}
138+
!42 = distinct !{!42, !43}
139+
!43 = !{!"llvm.loop.mustprogress"}
140+
!44 = !{!"branch_weights", i32 10000, i32 1}
141+
!45 = !{!"VP", i32 0, i64 10000, i64 2232412992676883508, i64 10000}
142+
!46 = distinct !{!46, !43}
143+
!47 = !{!"branch_weights", i32 400, i32 1}
144+
!48 = !{!"VP", i32 0, i64 400, i64 2232412992676883508, i64 200, i64 -455885480058394486, i64 200}
145+
!49 = distinct !{!49, !43}
146+
!50 = distinct !{!50, !43}

0 commit comments

Comments
 (0)