Skip to content

Commit aaa685c

Browse files
author
git apple-llvm automerger
committed
Merge commit 'ac39d26dc4a8' from llvm.org/main into next
2 parents 4e43b39 + ac39d26 commit aaa685c

File tree

2 files changed

+84
-1
lines changed

2 files changed

+84
-1
lines changed

llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ STATISTIC(NewMergedNodes, "Number of new nodes created during merging");
9595
STATISTIC(NonNewMergedNodes, "Number of non new nodes used during merging");
9696
STATISTIC(MissingAllocForContextId,
9797
"Number of missing alloc nodes for context ids");
98+
STATISTIC(SkippedCallsCloning,
99+
"Number of calls skipped during cloning due to unexpected operand");
98100

99101
static cl::opt<std::string> DotFilePathPrefix(
100102
"memprof-dot-file-path-prefix", cl::init(""), cl::Hidden,
@@ -5161,6 +5163,19 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
51615163

51625164
assert(!isMemProfClone(*CalledFunction));
51635165

5166+
// Because we update the cloned calls by calling setCalledOperand (see
5167+
// comment below), out of an abundance of caution make sure the called
5168+
// function was actually the called operand (or its aliasee). We also
5169+
// strip pointer casts when looking for calls (to match behavior during
5170+
// summary generation), however, with opaque pointers in theory this
5171+
// should not be an issue. Note we still clone the current function
5172+
// (containing this call) above, as that could be needed for its callers.
5173+
auto *GA = dyn_cast_or_null<GlobalAlias>(CB->getCalledOperand());
5174+
if (CalledFunction != CB->getCalledOperand() &&
5175+
(!GA || CalledFunction != GA->getAliaseeObject())) {
5176+
SkippedCallsCloning++;
5177+
return;
5178+
}
51645179
// Update the calls per the summary info.
51655180
// Save orig name since it gets updated in the first iteration
51665181
// below.
@@ -5179,7 +5194,13 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
51795194
CBClone = CB;
51805195
else
51815196
CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
5182-
CBClone->setCalledFunction(NewF);
5197+
// Set the called operand directly instead of calling setCalledFunction,
5198+
// as the latter mutates the function type on the call. In rare cases
5199+
// we may have a slightly different type on a callee function
5200+
// declaration due to it being imported from a different module with
5201+
// incomplete types. We really just want to change the name of the
5202+
// function to the clone, and not make any type changes.
5203+
CBClone->setCalledOperand(NewF.getCallee());
51835204
ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone)
51845205
<< ore::NV("Call", CBClone) << " in clone "
51855206
<< ore::NV("Caller", CBClone->getFunction())
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
;; Test to ensure the callite when updated to call a clone does not mutate the
2+
;; callee function type. In rare cases we may end up with a callee declaration
3+
;; that does not match the call type, because it was imported from a different
4+
;; module with an incomplete return type (in which case clang gives it a void
5+
;; return type).
6+
7+
; RUN: rm -rf %t && split-file %s %t && cd %t
8+
; RUN: llvm-as src.ll -o src.o
9+
; RUN: llvm-as src.o.thinlto.ll -o src.o.thinlto.bc
10+
; RUN: opt -passes=memprof-context-disambiguation src.o -S -memprof-import-summary=src.o.thinlto.bc | FileCheck %s
11+
12+
;--- src.ll
13+
; ModuleID = 'src.o'
14+
source_filename = "src.c"
15+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
16+
target triple = "x86_64-unknown-linux-gnu"
17+
18+
define i32 @main(ptr %b) {
19+
entry:
20+
;; This call is not changed as the summary specifies clone 0.
21+
; CHECK: call ptr @_Z3foov()
22+
%call = call ptr @_Z3foov(), !callsite !5
23+
;; After changing this call to call a clone, the function type should still
24+
;; be ptr, despite the void on the callee declaration.
25+
; CHECK: call ptr @_Z3foov.memprof.1()
26+
%call1 = call ptr @_Z3foov(), !callsite !6
27+
%0 = load ptr, ptr %b, align 8
28+
;; Although the summary indicates this should call clone 1, and the VP
29+
;; metadata indicates the callee is _Z3foov, it is not updated because
30+
;; the ICP facility requires the function types to match.
31+
; CHECK: call ptr %0()
32+
%call2 = call ptr %0(), !prof !7, !callsite !8
33+
ret i32 0
34+
}
35+
36+
;; Both the original callee function declaration and its clone have void return
37+
;; type.
38+
; CHECK: declare void @_Z3foov()
39+
; CHECK: declare void @_Z3foov.memprof.1()
40+
declare void @_Z3foov()
41+
42+
!llvm.dbg.cu = !{!0}
43+
!llvm.module.flags = !{!2, !3, !4}
44+
45+
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 21.0.0git ([email protected]:llvm/llvm-project.git e391301e0e4d9183fe06e69602e87b0bc889aeda)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
46+
!1 = !DIFile(filename: "src.cc", directory: "", checksumkind: CSK_MD5, checksum: "8636c46e81402013b9d54e8307d2f149")
47+
!2 = !{i32 7, !"Dwarf Version", i32 5}
48+
!3 = !{i32 2, !"Debug Info Version", i32 3}
49+
!4 = !{i32 1, !"EnableSplitLTOUnit", i32 0}
50+
!5 = !{i64 8632435727821051414}
51+
!6 = !{i64 -3421689549917153178}
52+
!7 = !{!"VP", i32 0, i64 4, i64 9191153033785521275, i64 4}
53+
!8 = !{i64 1234}
54+
55+
;--- src.o.thinlto.ll
56+
; ModuleID = 'src.o.thinlto.bc'
57+
source_filename = "src.o.thinlto.bc"
58+
59+
^0 = module: (path: "src.o", hash: (2823430083, 3994560862, 899296057, 1055405378, 2961356784))
60+
^1 = gv: (guid: 15822663052811949562, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 3, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), callsites: ((callee: null, clones: (0), stackIds: (8632435727821051414)), (callee: null, clones: (1), stackIds: (15025054523792398438)), (callee: null, clones: (1), stackIds: (1234))))))
61+
^2 = flags: 353
62+
^3 = blockcount: 0

0 commit comments

Comments
 (0)