Skip to content

Commit e3c621c

Browse files
[ThinLTO][MemProf] Add option to override max ICP with larger number (#171652)
Adds an option -module-summary-max-indirect-edges, and wiring into the ICP logic that collects promotion candidates from VP metadata, to support a larger number of promotion candidates for use in building the ThinLTO summary. Also use this in the MemProf ThinLTO backend handling where we perform memprof ICP during cloning. The new option, essentially off by default, can be used to override the value of -icp-max-prom, which is checked internally in ICP, with a larger max value when collecting candidates from the VP metadata. For MemProf in particular, where we synthesize new VP metadata targets from allocation contexts, which may not be all that frequent, we need to be able to include a larger set of these targets in the summary in order to correctly handle indirect calls in the contexts. Otherwise we will not set up the callsite graph edges correctly.
1 parent 3eef288 commit e3c621c

File tree

6 files changed

+94
-43
lines changed

6 files changed

+94
-43
lines changed

llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,14 @@ class ICallPromotionAnalysis {
5252
/// The \p TotalCount and \p NumCandidates are set to the the total profile
5353
/// count of the indirect call \p I and the number of profitable candidates
5454
/// in the given array (which is sorted in reverse order of profitability).
55+
/// The value of \p MaxNumValueData can be used to override the max set
56+
/// from the -icp-max-prom option with a larger value.
5557
///
5658
/// The returned array space is owned by this class, and overwritten on
5759
/// subsequent calls.
5860
MutableArrayRef<InstrProfValueData> getPromotionCandidatesForInstruction(
59-
const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates);
61+
const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates,
62+
unsigned MaxNumValueData = 0);
6063
};
6164

6265
} // end namespace llvm

llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,14 @@ uint32_t ICallPromotionAnalysis::getProfitablePromotionCandidates(
9393

9494
MutableArrayRef<InstrProfValueData>
9595
ICallPromotionAnalysis::getPromotionCandidatesForInstruction(
96-
const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates) {
96+
const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates,
97+
unsigned MaxNumValueData) {
98+
// Use the max of the values specified by -icp-max-prom and the provided
99+
// MaxNumValueData parameter.
100+
if (MaxNumPromotions > MaxNumValueData)
101+
MaxNumValueData = MaxNumPromotions;
97102
ValueDataArray = getValueProfDataFromInst(*I, IPVK_IndirectCallTarget,
98-
MaxNumPromotions, TotalCount);
103+
MaxNumValueData, TotalCount);
99104
if (ValueDataArray.empty()) {
100105
NumCandidates = 0;
101106
return MutableArrayRef<InstrProfValueData>();

llvm/lib/Analysis/ModuleSummaryAnalysis.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,19 @@ static cl::opt<bool> EnableMemProfIndirectCallSupport(
8585
cl::desc(
8686
"Enable MemProf support for summarizing and cloning indirect calls"));
8787

88+
// This can be used to override the number of callees created from VP metadata
89+
// normally taken from the -icp-max-prom option with a larger amount, if useful
90+
// for analysis. Use a separate option so that we can control the number of
91+
// indirect callees for ThinLTO summary based analysis (e.g. for MemProf which
92+
// needs this information for a correct and not overly-conservative callsite
93+
// graph analysis, especially because allocation contexts may not be very
94+
// frequent), without affecting normal ICP.
95+
cl::opt<unsigned>
96+
MaxSummaryIndirectEdges("module-summary-max-indirect-edges", cl::init(0),
97+
cl::Hidden,
98+
cl::desc("Max number of summary edges added from "
99+
"indirect call profile metadata"));
100+
88101
LLVM_ABI extern cl::opt<bool> ScalePartialSampleProfileWorkingSetSize;
89102

90103
extern cl::opt<unsigned> MaxNumVTableAnnotations;
@@ -494,8 +507,8 @@ static void computeFunctionSummary(
494507
}
495508

496509
CandidateProfileData =
497-
ICallAnalysis.getPromotionCandidatesForInstruction(&I, TotalCount,
498-
NumCandidates);
510+
ICallAnalysis.getPromotionCandidatesForInstruction(
511+
&I, TotalCount, NumCandidates, MaxSummaryIndirectEdges);
499512
for (const auto &Candidate : CandidateProfileData)
500513
CallGraphEdges[Index.getOrInsertValueInfo(Candidate.Value)]
501514
.updateHotness(getHotness(Candidate.Count, PSI));

llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,8 @@ cl::opt<bool> MemProfFixupImportant(
235235
"memprof-fixup-important", cl::init(true), cl::Hidden,
236236
cl::desc("Enables edge fixup for important contexts"));
237237

238+
extern cl::opt<unsigned> MaxSummaryIndirectEdges;
239+
238240
} // namespace llvm
239241

240242
namespace {
@@ -6067,8 +6069,8 @@ unsigned MemProfContextDisambiguation::recordICPInfo(
60676069
uint32_t NumCandidates;
60686070
uint64_t TotalCount;
60696071
auto CandidateProfileData =
6070-
ICallAnalysis->getPromotionCandidatesForInstruction(CB, TotalCount,
6071-
NumCandidates);
6072+
ICallAnalysis->getPromotionCandidatesForInstruction(
6073+
CB, TotalCount, NumCandidates, MaxSummaryIndirectEdges);
60726074
if (CandidateProfileData.empty())
60736075
return 0;
60746076

Lines changed: 57 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,57 @@
1-
; Promote at most one function and annotate at most one vtable.
2-
; As a result, only one value (of each relevant kind) shows up in the function
3-
; summary.
1+
;; Check that the values of -icp-max-num-vtables, -icp-max-prom, and
2+
;; -module-summary-max-indirect-edges affect the number of profiled
3+
;; vtables and virtual functions propagated from the VP metadata to
4+
;; the ThinLTO summary as expected.
45

6+
;; First try with a max of 1 for both vtables and virtual functions.
57
; RUN: opt -module-summary -icp-max-num-vtables=1 -icp-max-prom=1 %s -o %t.o
68

7-
; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s
9+
; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s --check-prefix=SUMMARY
810

911
; RUN: llvm-dis -o - %t.o | FileCheck %s --check-prefix=DIS
10-
; Round trip it through llvm-as
12+
;; Round trip it through llvm-as
1113
; RUN: llvm-dis -o - %t.o | llvm-as -o - | llvm-dis -o - | FileCheck %s --check-prefix=DIS
1214

13-
; CHECK: <GLOBALVAL_SUMMARY_BLOCK
14-
; CHECK-NEXT: <VERSION op0=
15-
; CHECK-NEXT: <FLAGS op0=0/>
16-
; The `VALUE_GUID` below represents the "_ZTV4Base" referenced by the instruction
17-
; that loads vtable pointers.
18-
; CHECK-NEXT: <VALUE_GUID {{.*}} op0=21 op1=456547254 op2=3929380924/>
19-
; The `VALUE_GUID` below represents the "_ZN4Base4funcEv" referenced by the
20-
; indirect call instruction.
21-
; CHECK-NEXT: <VALUE_GUID {{.*}} op0=20 op1=1271117309 op2=2009351347/>
22-
; NOTE vtables and functions from Derived class is dropped because
23-
; `-icp-max-num-vtables` and `-icp-max-prom` are both set to one.
24-
; <PERMODULE_PROFILE> has the format [valueid, flags, instcount, funcflags,
25-
; numrefs, rorefcnt, worefcnt,
26-
; m x valueid,
27-
; n x (valueid, hotness+tailcall)]
28-
; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op0=0 op1=0 op2=4 op3=256 op4=1 op5=1 op6=0 op7=21 op8=20 op9=3/>
29-
; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
15+
;; Next check that a larger -module-summary-max-indirect-edges value overrides
16+
;; -icp-max-prom when determining how many virtual functions to summarize.
17+
; RUN: opt -module-summary -icp-max-num-vtables=1 -icp-max-prom=1 -module-summary-max-indirect-edges=2 %s -o %t2.o
18+
; RUN: llvm-bcanalyzer -dump %t2.o | FileCheck %s --check-prefixes=SUMMARY,SUMMARY2
19+
; RUN: llvm-dis -o - %t2.o | FileCheck %s --check-prefixes=DIS,DIS2
20+
21+
; SUMMARY: <GLOBALVAL_SUMMARY_BLOCK
22+
; SUMMARY-NEXT: <VERSION op0=
23+
; SUMMARY-NEXT: <FLAGS op0=0/>
24+
25+
;; The `VALUE_GUID` below represents the "_ZTV4Base" referenced by the instruction
26+
;; that loads vtable pointers.
27+
; SUMMARY-NEXT: <VALUE_GUID {{.*}} op0=[[VTABLEBASE:[0-9]+]] op1=456547254 op2=3929380924/>
28+
;; The `VALUE_GUID` below represents the "_ZN4Base4funcEv" referenced by the
29+
;; indirect call instruction.
30+
; SUMMARY-NEXT: <VALUE_GUID {{.*}} op0=[[VFUNCBASE:[0-9]+]] op1=1271117309 op2=2009351347/>
31+
;; The `VALUE_GUID` below represents the "_ZN7Derived4funcEv" referenced by the
32+
;; indirect call instruction.
33+
; SUMMARY2-NEXT: <VALUE_GUID {{.*}} op0=[[VFUNCDER:[0-9]+]] op1=1437699922 op2=4037658799/>
34+
35+
;; <PERMODULE_PROFILE> has the format [valueid, flags, instcount, funcflags,
36+
;; numrefs, rorefcnt, worefcnt,
37+
;; m x valueid,
38+
;; n x (valueid, hotness+tailcall)]
39+
;; NOTE vtables and functions from Derived class are dropped in the base case
40+
;; because `-icp-max-num-vtables` and `-icp-max-prom` are both set to one.
41+
; SUMMARY-NEXT: <PERMODULE_PROFILE {{.*}} op0=0 op1=0 op2=4 op3=256 op4=1 op5=1 op6=0 op7=[[VTABLEBASE]] op8=[[VFUNCBASE]] op9=3
42+
;; With -module-summary-max-indirect-edges=2 we do get the Derived class
43+
;; function in the summary.
44+
; SUMMARY2-SAME: op10=[[VFUNCDER]] op11=2
45+
;; We should have no other ops before the end of the summary record.
46+
; SUMMARY-NOT: op
47+
; SUMMARY-SAME: />
48+
; SUMMARY-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
3049

3150
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
3251
target triple = "x86_64-unknown-linux-gnu"
3352

34-
; Function has one BB and an entry count of 150, so the BB is hot according to
35-
; ProfileSummary and reflected so in the bitcode (see llvm-dis output).
53+
;; Function has one BB and an entry count of 150, so the BB is hot according to
54+
;; ProfileSummary and reflected so in the bitcode (see llvm-dis output).
3655
define i32 @_Z4testP4Base(ptr %0) !prof !15 {
3756
%2 = load ptr, ptr %0, !prof !16
3857
%3 = load ptr, ptr %2
@@ -58,17 +77,20 @@ define i32 @_Z4testP4Base(ptr %0) !prof !15 {
5877
!14 = !{i32 999999, i64 1, i32 2}
5978

6079
!15 = !{!"function_entry_count", i32 150}
61-
; 1960855528937986108 is the MD5 hash of _ZTV4Base, and
62-
; 13870436605473471591 is the MD5 hash of _ZTV7Derived
80+
;; 1960855528937986108 is the MD5 hash of _ZTV4Base, and
81+
;; 13870436605473471591 is the MD5 hash of _ZTV7Derived
6382
!16 = !{!"VP", i32 2, i64 150, i64 1960855528937986108, i64 100, i64 13870436605473471591, i64 50}
64-
; 5459407273543877811 is the MD5 hash of _ZN4Base4funcEv, and
65-
; 6174874150489409711 is the MD5 hash of _ZN7Derived4funcEv
83+
;; 5459407273543877811 is the MD5 hash of _ZN4Base4funcEv, and
84+
;; 6174874150489409711 is the MD5 hash of _ZN7Derived4funcEv
6685
!17 = !{!"VP", i32 0, i64 150, i64 5459407273543877811, i64 100, i64 6174874150489409711, i64 50}
6786

68-
; ModuleSummaryIndex stores <guid, global-value summary> map in std::map; so
69-
; global value summares are printed out in the order that gv's guid increases.
70-
; DIS: ^0 = module: (path: "{{.*}}", hash: (0, 0, 0, 0, 0))
71-
; DIS: ^1 = gv: (guid: 1960855528937986108)
72-
; DIS: ^2 = gv: (guid: 5459407273543877811)
73-
; DIS: ^3 = gv: (name: "_Z4testP4Base", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 4, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), calls: ((callee: ^2, hotness: hot)), refs: (readonly ^1)))) ; guid = 15857150948103218965
74-
; DIS: ^4 = blockcount: 0
87+
;; ModuleSummaryIndex stores <guid, global-value summary> map in std::map; so
88+
;; global value summaries are printed out in the order that gv's guid increases.
89+
; DIS: ^[[VTABLEBASE2:[0-9]+]] = gv: (guid: 1960855528937986108)
90+
; DIS: ^[[VFUNCBASE2:[0-9]+]] = gv: (guid: 5459407273543877811)
91+
; DIS2: ^[[VFUNCDER2:[0-9]+]] = gv: (guid: 6174874150489409711)
92+
; DIS: gv: (name: "_Z4testP4Base", {{.*}} calls: ((callee: ^[[VFUNCBASE2]], hotness: hot)
93+
;; With -module-summary-max-indirect-edges=2 we get the Derived func.
94+
; DIS2-SAME: (callee: ^[[VFUNCDER2]], hotness: none)
95+
; DIS-NOT: callee
96+
; DIS-SAME: ), refs: (readonly ^[[VTABLEBASE2]])

llvm/test/ThinLTO/X86/memprof-icp.ll

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,9 @@
7070
; RUN: split-file %s %t
7171

7272
; RUN: opt -thinlto-bc %t/main.ll >%t/main.o
73-
; RUN: opt -thinlto-bc %t/foo.ll >%t/foo.o
73+
;; Check that -module-summary-max-indirect-edges correctly overrides
74+
;; -icp-max-prom with a higher max when building summary.
75+
; RUN: opt -thinlto-bc -icp-max-prom=1 -module-summary-max-indirect-edges=2 %t/foo.ll >%t/foo.o
7476

7577
;; Check that we get the synthesized callsite records. There should be 2, one
7678
;; for each profiled target in the VP metadata. They will have the same stackIds
@@ -86,6 +88,10 @@
8688
;; First perform in-process ThinLTO
8789
; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \
8890
; RUN: -enable-memprof-indirect-call-support=true \
91+
;; Check that -module-summary-max-indirect-edges correctly overrides
92+
;; -icp-max-prom with a higher max when performing memprof ICP.
93+
; RUN: -icp-max-prom=1 \
94+
; RUN: -module-summary-max-indirect-edges=2 \
8995
; RUN: -supports-hot-cold-new \
9096
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \
9197
; RUN: -r=%t/foo.o,_ZN2B03barEj.abc,plx \

0 commit comments

Comments
 (0)