Skip to content

Commit f82fb70

Browse files
JIT: profile checking through inlining (#101834)
Advance profile consistency check through inlining. Turns out there are five reasons why inlining may make profile data inconsistent. Account for these and add metrics. Also add separate metrics for consistency before and after inlining, since pre-inline phases are run on inlinees and so don't give us good insight into overall consistency rates. And add some metrics for inlining itself. Contributes to #93020. Co-authored-by: Aman Khalid <[email protected]>
1 parent e1f98a1 commit f82fb70

9 files changed

+252
-44
lines changed

src/coreclr/jit/compiler.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4697,11 +4697,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
46974697
return;
46984698
}
46994699

4700-
// Drop back to just checking profile likelihoods.
4701-
//
4702-
activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE;
4703-
activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS;
4704-
47054700
// At this point in the phase list, all the inlinee phases have
47064701
// been run, and inlinee compiles have exited, so we should only
47074702
// get this far if we are jitting the root method.
@@ -4718,6 +4713,11 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
47184713
// Record "start" values for post-inlining cycles and elapsed time.
47194714
RecordStateAtEndOfInlining();
47204715

4716+
// Drop back to just checking profile likelihoods.
4717+
//
4718+
activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE;
4719+
activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS;
4720+
47214721
// Transform each GT_ALLOCOBJ node into either an allocation helper call or
47224722
// local variable allocation on the stack.
47234723
ObjectAllocator objectAllocator(this); // PHASE_ALLOCATE_OBJECTS

src/coreclr/jit/compiler.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5284,6 +5284,7 @@ class Compiler
52845284

52855285
// The number of separate return points in the method.
52865286
unsigned fgReturnCount;
5287+
unsigned fgThrowCount;
52875288

52885289
PhaseStatus fgAddInternal();
52895290

@@ -6228,7 +6229,7 @@ class Compiler
62286229

62296230
void fgLinkBasicBlocks();
62306231

6231-
unsigned fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, FixedBitVect* jumpTarget);
6232+
void fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, FixedBitVect* jumpTarget);
62326233

62336234
void fgCheckBasicBlockControlFlow();
62346235

src/coreclr/jit/fgbasic.cpp

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ void Compiler::fgInit()
5050
fgDomBBcount = 0;
5151
fgBBVarSetsInited = false;
5252
fgReturnCount = 0;
53+
fgThrowCount = 0;
5354

5455
m_dfsTree = nullptr;
5556
m_loops = nullptr;
@@ -233,8 +234,9 @@ bool Compiler::fgEnsureFirstBBisScratch()
233234
{
234235
// If the result is clearly nonsensical, just inherit
235236
//
236-
JITDUMP("\fgEnsureFirstBBisScratch: Profile data could not be locally repaired. Data %s inconsisent.\n",
237-
fgPgoConsistent ? "is now" : "was already");
237+
JITDUMP(
238+
"\fgEnsureFirstBBisScratch: Profile data could not be locally repaired. Data %s inconsistent.\n",
239+
fgPgoConsistent ? "is now" : "was already");
238240

239241
if (fgPgoConsistent)
240242
{
@@ -3099,19 +3101,18 @@ void Compiler::fgLinkBasicBlocks()
30993101
// codeSize -- length of the IL stream
31003102
// jumpTarget -- [in] bit vector of jump targets found by fgFindJumpTargets
31013103
//
3102-
// Returns:
3103-
// number of return blocks (BBJ_RETURN) in the method (may be zero)
3104-
//
31053104
// Notes:
3106-
// Invoked for prejited and jitted methods, and for all inlinees
3107-
3108-
unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, FixedBitVect* jumpTarget)
3105+
// Invoked for prejitted and jitted methods, and for all inlinees.
3106+
// Sets fgReturnCount and fgThrowCount
3107+
//
3108+
void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, FixedBitVect* jumpTarget)
31093109
{
3110-
unsigned retBlocks = 0;
3111-
const BYTE* codeBegp = codeAddr;
3112-
const BYTE* codeEndp = codeAddr + codeSize;
3113-
bool tailCall = false;
3114-
unsigned curBBoffs = 0;
3110+
unsigned retBlocks = 0;
3111+
unsigned throwBlocks = 0;
3112+
const BYTE* codeBegp = codeAddr;
3113+
const BYTE* codeEndp = codeAddr + codeSize;
3114+
bool tailCall = false;
3115+
unsigned curBBoffs = 0;
31153116
BasicBlock* curBBdesc;
31163117

31173118
// Keep track of where we are in the scope lists, as we will also
@@ -3312,7 +3313,8 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F
33123313
// can be dispatched as tail calls from the caller.
33133314
compInlineResult->NoteFatal(InlineObservation::CALLEE_EXPLICIT_TAIL_PREFIX);
33143315
retBlocks++;
3315-
return retBlocks;
3316+
fgReturnCount = retBlocks;
3317+
return;
33163318
}
33173319

33183320
FALLTHROUGH;
@@ -3415,6 +3417,7 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F
34153417

34163418
case CEE_THROW:
34173419
case CEE_RETHROW:
3420+
throwBlocks++;
34183421
jmpKind = BBJ_THROW;
34193422
break;
34203423

@@ -3597,7 +3600,8 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F
35973600

35983601
fgLinkBasicBlocks();
35993602

3600-
return retBlocks;
3603+
fgReturnCount = retBlocks;
3604+
fgThrowCount = throwBlocks;
36013605
}
36023606

36033607
/*****************************************************************************
@@ -3709,7 +3713,7 @@ void Compiler::fgFindBasicBlocks()
37093713

37103714
/* Now create the basic blocks */
37113715

3712-
fgReturnCount = fgMakeBasicBlocks(info.compCode, info.compILCodeSize, jumpTarget);
3716+
fgMakeBasicBlocks(info.compCode, info.compILCodeSize, jumpTarget);
37133717

37143718
if (compIsForInlining())
37153719
{
@@ -4269,7 +4273,7 @@ void Compiler::fgFixEntryFlowForOSR()
42694273
//
42704274
if ((fgEntryBB->bbPreds != nullptr) && (fgEntryBB != fgOSREntryBB))
42714275
{
4272-
JITDUMP("OSR: profile data could not be locally repaired. Data %s inconsisent.\n",
4276+
JITDUMP("OSR: profile data could not be locally repaired. Data %s inconsistent.\n",
42734277
fgPgoConsistent ? "is now" : "was already");
42744278
fgPgoConsistent = false;
42754279
}

src/coreclr/jit/fginline.cpp

Lines changed: 147 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -624,24 +624,86 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<Substi
624624
{
625625
JITDUMP(" ... found foldable jtrue at [%06u] in " FMT_BB "\n", m_compiler->dspTreeID(tree),
626626
block->bbNum);
627+
m_compiler->Metrics.InlinerBranchFold++;
627628

628629
// We have a constant operand, and should have the all clear to optimize.
629630
// Update side effects on the tree, assert there aren't any, and bash to nop.
630631
m_compiler->gtUpdateNodeSideEffects(tree);
631632
assert((tree->gtFlags & GTF_SIDE_EFFECT) == 0);
632633
tree->gtBashToNOP();
633-
m_madeChanges = true;
634+
m_madeChanges = true;
635+
FlowEdge* removedEdge = nullptr;
634636

635637
if (condTree->IsIntegralConst(0))
636638
{
637-
m_compiler->fgRemoveRefPred(block->GetTrueEdge());
639+
removedEdge = block->GetTrueEdge();
640+
m_compiler->fgRemoveRefPred(removedEdge);
638641
block->SetKindAndTargetEdge(BBJ_ALWAYS, block->GetFalseEdge());
639642
}
640643
else
641644
{
642-
m_compiler->fgRemoveRefPred(block->GetFalseEdge());
645+
removedEdge = block->GetFalseEdge();
646+
m_compiler->fgRemoveRefPred(removedEdge);
643647
block->SetKindAndTargetEdge(BBJ_ALWAYS, block->GetTrueEdge());
644648
}
649+
650+
// Update profile; make it consistent if possible
651+
//
652+
if (block->hasProfileWeight())
653+
{
654+
bool repairWasComplete = true;
655+
weight_t const weight = removedEdge->getLikelyWeight();
656+
657+
if (weight > 0)
658+
{
659+
// Target block weight will increase.
660+
//
661+
BasicBlock* const target = block->GetTarget();
662+
assert(target->hasProfileWeight());
663+
target->setBBProfileWeight(target->bbWeight + weight);
664+
665+
// Alternate weight will decrease
666+
//
667+
BasicBlock* const alternate = removedEdge->getDestinationBlock();
668+
assert(alternate->hasProfileWeight());
669+
weight_t const alternateNewWeight = alternate->bbWeight - weight;
670+
671+
// If profile weights are consistent, expect at worst a slight underflow.
672+
//
673+
if (m_compiler->fgPgoConsistent && (alternateNewWeight < 0))
674+
{
675+
assert(m_compiler->fgProfileWeightsEqual(alternateNewWeight, 0));
676+
}
677+
alternate->setBBProfileWeight(max(0.0, alternateNewWeight));
678+
679+
// This will affect profile transitively, so in general
680+
// the profile will become inconsistent.
681+
//
682+
repairWasComplete = false;
683+
684+
// But we can check for the special case where the
685+
// block's postdominator is target's target (simple
686+
// if/then/else/join).
687+
//
688+
if (target->KindIs(BBJ_ALWAYS))
689+
{
690+
repairWasComplete =
691+
alternate->KindIs(BBJ_ALWAYS) && alternate->TargetIs(target->GetTarget());
692+
}
693+
}
694+
695+
if (!repairWasComplete)
696+
{
697+
JITDUMP("Profile data could not be locally repaired. Data %s inconsistent.\n",
698+
m_compiler->fgPgoConsistent ? "is now" : "was already");
699+
700+
if (m_compiler->fgPgoConsistent)
701+
{
702+
m_compiler->Metrics.ProfileInconsistentInlinerBranchFold++;
703+
m_compiler->fgPgoConsistent = false;
704+
}
705+
}
706+
}
645707
}
646708
}
647709
else
@@ -692,6 +754,11 @@ PhaseStatus Compiler::fgInline()
692754
JitConfig.JitPrintInlinedMethods().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args);
693755
#endif // DEBUG
694756

757+
if (fgPgoConsistent)
758+
{
759+
Metrics.ProfileConsistentBeforeInline++;
760+
}
761+
695762
noway_assert(fgFirstBB != nullptr);
696763

697764
BasicBlock* block = fgFirstBB;
@@ -822,6 +889,14 @@ PhaseStatus Compiler::fgInline()
822889
fgRenumberBlocks();
823890
}
824891

892+
if (fgPgoConsistent)
893+
{
894+
Metrics.ProfileConsistentAfterInline++;
895+
}
896+
897+
Metrics.InlineCount = m_inlineStrategy->GetInlineCount();
898+
Metrics.InlineAttempt = m_inlineStrategy->GetImportCount();
899+
825900
return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
826901
}
827902

@@ -1611,6 +1686,75 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
16111686
}
16121687
#endif
16131688

1689+
// Update profile consistency
1690+
//
1691+
// If inlinee is inconsistent, root method will be inconsistent too.
1692+
//
1693+
if (!InlineeCompiler->fgPgoConsistent)
1694+
{
1695+
if (fgPgoConsistent)
1696+
{
1697+
JITDUMP("INLINER: profile data in root now inconsistent -- inlinee had inconsistency\n");
1698+
Metrics.ProfileInconsistentInlinee++;
1699+
fgPgoConsistent = false;
1700+
}
1701+
}
1702+
1703+
// If we inline a no-return call at a site with profile weight,
1704+
// we will introduce inconsistency.
1705+
//
1706+
if (InlineeCompiler->fgReturnCount == 0)
1707+
{
1708+
JITDUMP("INLINER: no-return inlinee\n");
1709+
1710+
if (iciBlock->bbWeight > 0)
1711+
{
1712+
if (fgPgoConsistent)
1713+
{
1714+
JITDUMP("INLINER: profile data in root now inconsistent -- no-return inlinee at call site in " FMT_BB
1715+
" with weight " FMT_WT "\n",
1716+
iciBlock->bbNum, iciBlock->bbWeight);
1717+
Metrics.ProfileInconsistentNoReturnInlinee++;
1718+
fgPgoConsistent = false;
1719+
}
1720+
}
1721+
else
1722+
{
1723+
// Inlinee scaling should assure this is so.
1724+
//
1725+
assert(InlineeCompiler->fgFirstBB->bbWeight == 0);
1726+
}
1727+
}
1728+
1729+
// If the call site is not in a try and the callee has a throw,
1730+
// we may introduce inconsistency.
1731+
//
1732+
// Technically we should check if the callee has a throw not in a try, but since
1733+
// we can't inline methods with EH yet we don't see those.
1734+
//
1735+
if (InlineeCompiler->fgThrowCount > 0)
1736+
{
1737+
JITDUMP("INLINER: may-throw inlinee\n");
1738+
1739+
if (iciBlock->bbWeight > 0)
1740+
{
1741+
if (fgPgoConsistent)
1742+
{
1743+
JITDUMP("INLINER: profile data in root now inconsistent -- may-throw inlinee at call site in " FMT_BB
1744+
" with weight " FMT_WT "\n",
1745+
iciBlock->bbNum, iciBlock->bbWeight);
1746+
Metrics.ProfileInconsistentMayThrowInlinee++;
1747+
fgPgoConsistent = false;
1748+
}
1749+
}
1750+
else
1751+
{
1752+
// Inlinee scaling should assure this is so.
1753+
//
1754+
assert(InlineeCompiler->fgFirstBB->bbWeight == 0);
1755+
}
1756+
}
1757+
16141758
// If an inlinee needs GS cookie we need to make sure that the cookie will not be allocated at zero stack offset.
16151759
// Note that if the root method needs GS cookie then this has already been taken care of.
16161760
if (!getNeedsGSSecurityCookie() && InlineeCompiler->getNeedsGSSecurityCookie())

src/coreclr/jit/fgprofile.cpp

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -141,22 +141,39 @@ void Compiler::fgApplyProfileScale()
141141
JITDUMP(" ... no callee profile data, will use non-pgo weight to scale\n");
142142
}
143143

144-
// Ostensibly this should be fgCalledCount for the callee, but that's not available
145-
// as it requires some analysis.
144+
// Determine the weight of the first block preds, if any.
145+
// (only happens if the first block is a loop head).
146146
//
147-
// For most callees it will be the same as the entry block count.
148-
//
149-
// Note when/if we early do normalization this may need to change.
147+
weight_t firstBlockPredWeight = 0;
148+
for (FlowEdge* const firstBlockPred : fgFirstBB->PredEdges())
149+
{
150+
firstBlockPredWeight += firstBlockPred->getLikelyWeight();
151+
}
152+
153+
// Determine the "input" weight for the callee
150154
//
151155
weight_t calleeWeight = fgFirstBB->bbWeight;
152156

153-
// Callee entry weight is nonzero?
157+
// Callee entry weight is zero or negative (taking backedges into account)?
154158
// If so, just choose the smallest plausible weight.
155159
//
156-
if (calleeWeight == BB_ZERO_WEIGHT)
160+
if (calleeWeight <= firstBlockPredWeight)
157161
{
158162
calleeWeight = fgHaveProfileWeights() ? 1.0 : BB_UNITY_WEIGHT;
159-
JITDUMP(" ... callee entry has weight zero, will use weight of " FMT_WT " to scale\n", calleeWeight);
163+
JITDUMP(" ... callee entry has zero or negative weight, will use weight of " FMT_WT " to scale\n",
164+
calleeWeight);
165+
JITDUMP("Profile data could not be scaled consistently. Data %s inconsistent.\n",
166+
fgPgoConsistent ? "is now" : "was already");
167+
168+
if (fgPgoConsistent)
169+
{
170+
Metrics.ProfileInconsistentInlineeScale++;
171+
fgPgoConsistent = false;
172+
}
173+
}
174+
else
175+
{
176+
calleeWeight -= firstBlockPredWeight;
160177
}
161178

162179
// Call site has profile weight?
@@ -2829,6 +2846,14 @@ PhaseStatus Compiler::fgInstrumentMethod()
28292846
//
28302847
PhaseStatus Compiler::fgIncorporateProfileData()
28312848
{
2849+
// For now we only rely on profile data when optimizing.
2850+
//
2851+
if (!opts.OptimizationEnabled())
2852+
{
2853+
JITDUMP("not optimizing, so not incorporating any profile data\n");
2854+
return PhaseStatus::MODIFIED_NOTHING;
2855+
}
2856+
28322857
// Are we doing profile stress?
28332858
//
28342859
if (fgStressBBProf() > 0)

0 commit comments

Comments
 (0)