Skip to content

Commit 0c17737

Browse files
authored
JIT: improve profile update for loop inversion (#85265)
If the loop test block has multiple predecessors we will not do proper profile updates. This can lead to downstream problems with block layout (say leaving a cold block in a loop). Fix by changing how we compute the amount of profile that should remain in the test block. Fixes #84319.
1 parent f077de0 commit 0c17737

File tree

1 file changed

+72
-37
lines changed

1 file changed

+72
-37
lines changed

src/coreclr/jit/optimizer.cpp

Lines changed: 72 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -4875,22 +4875,28 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
48754875
}
48764876

48774877
// Get hold of the jump target
4878-
BasicBlock* bTest = block->bbJumpDest;
4878+
BasicBlock* const bTest = block->bbJumpDest;
48794879

4880-
// Does the block consist of 'jtrue(cond) block' ?
4880+
// Does the bTest consist of 'jtrue(cond) block' ?
48814881
if (bTest->bbJumpKind != BBJ_COND)
48824882
{
48834883
return false;
48844884
}
48854885

48864886
// bTest must be a backwards jump to block->bbNext
4887-
if (bTest->bbJumpDest != block->bbNext)
4887+
// This will be the top of the loop.
4888+
//
4889+
BasicBlock* const bTop = bTest->bbJumpDest;
4890+
4891+
if (bTop != block->bbNext)
48884892
{
48894893
return false;
48904894
}
48914895

4892-
// Since test is a BBJ_COND it will have a bbNext
4893-
noway_assert(bTest->bbNext != nullptr);
4896+
// Since bTest is a BBJ_COND it will have a bbNext
4897+
//
4898+
BasicBlock* const bJoin = bTest->bbNext;
4899+
noway_assert(bJoin != nullptr);
48944900

48954901
// 'block' must be in the same try region as the condition, since we're going to insert a duplicated condition
48964902
// in a new block after 'block', and the condition might include exception throwing code.
@@ -4903,8 +4909,7 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
49034909

49044910
// The duplicated condition block will branch to bTest->bbNext, so that also better be in the
49054911
// same try region (or no try region) to avoid generating illegal flow.
4906-
BasicBlock* bTestNext = bTest->bbNext;
4907-
if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext))
4912+
if (bJoin->hasTryIndex() && !BasicBlock::sameTryRegion(block, bJoin))
49084913
{
49094914
return false;
49104915
}
@@ -4919,7 +4924,7 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
49194924
}
49204925

49214926
// Find the loop termination test at the bottom of the loop.
4922-
Statement* condStmt = bTest->lastStmt();
4927+
Statement* const condStmt = bTest->lastStmt();
49234928

49244929
// Verify the test block ends with a conditional that we can manipulate.
49254930
GenTree* const condTree = condStmt->GetRootNode();
@@ -4929,6 +4934,9 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
49294934
return false;
49304935
}
49314936

4937+
JITDUMP("Matched flow pattern for loop inversion: block " FMT_BB " bTop " FMT_BB " bTest " FMT_BB "\n",
4938+
block->bbNum, bTop->bbNum, bTest->bbNum);
4939+
49324940
// Estimate the cost of cloning the entire test block.
49334941
//
49344942
// Note: it would help throughput to compute the maximum cost
@@ -4956,43 +4964,53 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
49564964
bool allProfileWeightsAreValid = false;
49574965
weight_t const weightBlock = block->bbWeight;
49584966
weight_t const weightTest = bTest->bbWeight;
4959-
weight_t const weightNext = block->bbNext->bbWeight;
4967+
weight_t const weightTop = bTop->bbWeight;
49604968

49614969
// If we have profile data then we calculate the number of times
49624970
// the loop will iterate into loopIterations
49634971
if (fgIsUsingProfileWeights())
49644972
{
49654973
// Only rely upon the profile weight when all three of these blocks
49664974
// have good profile weights
4967-
if (block->hasProfileWeight() && bTest->hasProfileWeight() && block->bbNext->hasProfileWeight())
4975+
if (block->hasProfileWeight() && bTest->hasProfileWeight() && bTop->hasProfileWeight())
49684976
{
49694977
// If this while loop never iterates then don't bother transforming
49704978
//
4971-
if (weightNext == BB_ZERO_WEIGHT)
4979+
if (weightTop == BB_ZERO_WEIGHT)
49724980
{
49734981
return true;
49744982
}
49754983

4976-
// We generally expect weightTest == weightNext + weightBlock.
4984+
// We generally expect weightTest > weightTop
49774985
//
49784986
// Tolerate small inconsistencies...
49794987
//
4980-
if (!fgProfileWeightsConsistent(weightBlock + weightNext, weightTest))
4988+
if (!fgProfileWeightsConsistent(weightBlock + weightTop, weightTest))
49814989
{
49824990
JITDUMP("Profile weights locally inconsistent: block " FMT_WT ", next " FMT_WT ", test " FMT_WT "\n",
4983-
weightBlock, weightNext, weightTest);
4991+
weightBlock, weightTop, weightTest);
49844992
}
49854993
else
49864994
{
49874995
allProfileWeightsAreValid = true;
49884996

4989-
// Determine iteration count
4997+
// Determine average iteration count
49904998
//
4991-
// weightNext is the number of time this loop iterates
4992-
// weightBlock is the number of times that we enter the while loop
4999+
// weightTop is the number of time this loop executes
5000+
// weightTest is the number of times that we consider entering or remaining in the loop
49935001
// loopIterations is the average number of times that this loop iterates
49945002
//
4995-
loopIterations = weightNext / weightBlock;
5003+
weight_t loopEntries = weightTest - weightTop;
5004+
5005+
// If profile is inaccurate, try and use other data to provide a credible estimate.
5006+
// The value should at least be >= weightBlock.
5007+
//
5008+
if (loopEntries < weightBlock)
5009+
{
5010+
loopEntries = weightBlock;
5011+
}
5012+
5013+
loopIterations = weightTop / loopEntries;
49965014
}
49975015
}
49985016
else
@@ -5132,16 +5150,33 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
51325150
// Flag the block that received the copy as potentially having various constructs.
51335151
bNewCond->bbFlags |= bTest->bbFlags & BBF_COPY_PROPAGATE;
51345152

5135-
bNewCond->bbJumpDest = bTest->bbNext;
5153+
// Fix flow and profile
5154+
//
5155+
bNewCond->bbJumpDest = bJoin;
51365156
bNewCond->inheritWeight(block);
51375157

5138-
// Update bbRefs and bbPreds for 'bNewCond', 'bNewCond->bbNext' 'bTest' and 'bTest->bbNext'.
5158+
if (allProfileWeightsAreValid)
5159+
{
5160+
weight_t const delta = weightTest - weightTop;
51395161

5140-
fgAddRefPred(bNewCond, block);
5141-
fgAddRefPred(bNewCond->bbNext, bNewCond);
5162+
// If there is just one outside edge incident on bTest, then ideally delta == block->bbWeight.
5163+
// But this might not be the case if profile data is inconsistent.
5164+
//
5165+
// And if bTest has multiple outside edges we want to account for the weight of them all.
5166+
//
5167+
if (delta > block->bbWeight)
5168+
{
5169+
bNewCond->setBBProfileWeight(delta);
5170+
}
5171+
}
51425172

5173+
// Update pred info
5174+
//
5175+
fgAddRefPred(bJoin, bNewCond);
5176+
fgAddRefPred(bTop, bNewCond);
5177+
5178+
fgAddRefPred(bNewCond, block);
51435179
fgRemoveRefPred(bTest, block);
5144-
fgAddRefPred(bTest->bbNext, bNewCond);
51455180

51465181
// Move all predecessor edges that look like loop entry edges to point to the new cloned condition
51475182
// block, not the existing condition block. The idea is that if we only move `block` to point to
@@ -5151,15 +5186,15 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
51515186
// as the proxy for predecessors that are "in" versus "out" of the potential loop. Note that correctness
51525187
// is maintained no matter which condition block we point to, but we'll lose optimization potential
51535188
// (and create spaghetti code) if we get it wrong.
5154-
5189+
//
51555190
BlockToBlockMap blockMap(getAllocator(CMK_LoopOpt));
51565191
bool blockMapInitialized = false;
51575192

5158-
unsigned loopFirstNum = bNewCond->bbNext->bbNum;
5159-
unsigned loopBottomNum = bTest->bbNum;
5193+
unsigned const loopFirstNum = bTop->bbNum;
5194+
unsigned const loopBottomNum = bTest->bbNum;
51605195
for (BasicBlock* const predBlock : bTest->PredBlocks())
51615196
{
5162-
unsigned bNum = predBlock->bbNum;
5197+
unsigned const bNum = predBlock->bbNum;
51635198
if ((loopFirstNum <= bNum) && (bNum <= loopBottomNum))
51645199
{
51655200
// Looks like the predecessor is from within the potential loop; skip it.
@@ -5189,8 +5224,8 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
51895224
// cases of stress modes with inconsistent weights.
51905225
//
51915226
JITDUMP("Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT "\n", bTest->bbNum, weightTest,
5192-
weightNext);
5193-
bTest->inheritWeight(block->bbNext);
5227+
weightTop);
5228+
bTest->inheritWeight(bTop);
51945229

51955230
// Determine the new edge weights.
51965231
//
@@ -5200,23 +5235,23 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
52005235
// Note "next" is the loop top block, not bTest's bbNext,
52015236
// we'll call this latter block "after".
52025237
//
5203-
weight_t const testToNextLikelihood = min(1.0, weightNext / weightTest);
5238+
weight_t const testToNextLikelihood = min(1.0, weightTop / weightTest);
52045239
weight_t const testToAfterLikelihood = 1.0 - testToNextLikelihood;
52055240

5206-
// Adjust edges out of bTest (which now has weight weightNext)
5241+
// Adjust edges out of bTest (which now has weight weightTop)
52075242
//
5208-
weight_t const testToNextWeight = weightNext * testToNextLikelihood;
5209-
weight_t const testToAfterWeight = weightNext * testToAfterLikelihood;
5243+
weight_t const testToNextWeight = weightTop * testToNextLikelihood;
5244+
weight_t const testToAfterWeight = weightTop * testToAfterLikelihood;
52105245

5211-
FlowEdge* const edgeTestToNext = fgGetPredForBlock(bTest->bbJumpDest, bTest);
5246+
FlowEdge* const edgeTestToNext = fgGetPredForBlock(bTop, bTest);
52125247
FlowEdge* const edgeTestToAfter = fgGetPredForBlock(bTest->bbNext, bTest);
52135248

5214-
JITDUMP("Setting weight of " FMT_BB " -> " FMT_BB " to " FMT_WT " (iterate loop)\n", bTest->bbNum,
5215-
bTest->bbJumpDest->bbNum, testToNextWeight);
5249+
JITDUMP("Setting weight of " FMT_BB " -> " FMT_BB " to " FMT_WT " (iterate loop)\n", bTest->bbNum, bTop->bbNum,
5250+
testToNextWeight);
52165251
JITDUMP("Setting weight of " FMT_BB " -> " FMT_BB " to " FMT_WT " (exit loop)\n", bTest->bbNum,
52175252
bTest->bbNext->bbNum, testToAfterWeight);
52185253

5219-
edgeTestToNext->setEdgeWeights(testToNextWeight, testToNextWeight, bTest->bbJumpDest);
5254+
edgeTestToNext->setEdgeWeights(testToNextWeight, testToNextWeight, bTop);
52205255
edgeTestToAfter->setEdgeWeights(testToAfterWeight, testToAfterWeight, bTest->bbNext);
52215256

52225257
// Adjust edges out of block, using the same distribution.

0 commit comments

Comments
 (0)