Skip to content

JIT: improve profile update for loop inversion #85265

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 25, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 72 additions & 37 deletions src/coreclr/jit/optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4875,22 +4875,28 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
}

// Get hold of the jump target
BasicBlock* bTest = block->bbJumpDest;
BasicBlock* const bTest = block->bbJumpDest;

// Does the block consist of 'jtrue(cond) block' ?
// Does the bTest consist of 'jtrue(cond) block' ?
if (bTest->bbJumpKind != BBJ_COND)
{
return false;
}

// bTest must be a backwards jump to block->bbNext
if (bTest->bbJumpDest != block->bbNext)
// This will be the top of the loop.
//
BasicBlock* const bTop = bTest->bbJumpDest;

if (bTop != block->bbNext)
{
return false;
}

// Since test is a BBJ_COND it will have a bbNext
noway_assert(bTest->bbNext != nullptr);
// Since bTest is a BBJ_COND it will have a bbNext
//
BasicBlock* const bJoin = bTest->bbNext;
noway_assert(bJoin != nullptr);

// 'block' must be in the same try region as the condition, since we're going to insert a duplicated condition
// in a new block after 'block', and the condition might include exception throwing code.
Expand All @@ -4903,8 +4909,7 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)

// The duplicated condition block will branch to bTest->bbNext, so that also better be in the
// same try region (or no try region) to avoid generating illegal flow.
BasicBlock* bTestNext = bTest->bbNext;
if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext))
if (bJoin->hasTryIndex() && !BasicBlock::sameTryRegion(block, bJoin))
{
return false;
}
Expand All @@ -4919,7 +4924,7 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
}

// Find the loop termination test at the bottom of the loop.
Statement* condStmt = bTest->lastStmt();
Statement* const condStmt = bTest->lastStmt();

// Verify the test block ends with a conditional that we can manipulate.
GenTree* const condTree = condStmt->GetRootNode();
Expand All @@ -4929,6 +4934,9 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
return false;
}

JITDUMP("Matched flow pattern for loop inversion: block " FMT_BB " bTop " FMT_BB " bTest " FMT_BB "\n",
block->bbNum, bTop->bbNum, bTest->bbNum);

// Estimate the cost of cloning the entire test block.
//
// Note: it would help throughput to compute the maximum cost
Expand Down Expand Up @@ -4956,43 +4964,53 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
bool allProfileWeightsAreValid = false;
weight_t const weightBlock = block->bbWeight;
weight_t const weightTest = bTest->bbWeight;
weight_t const weightNext = block->bbNext->bbWeight;
weight_t const weightTop = bTop->bbWeight;

// If we have profile data then we calculate the number of times
// the loop will iterate into loopIterations
if (fgIsUsingProfileWeights())
{
// Only rely upon the profile weight when all three of these blocks
// have good profile weights
if (block->hasProfileWeight() && bTest->hasProfileWeight() && block->bbNext->hasProfileWeight())
if (block->hasProfileWeight() && bTest->hasProfileWeight() && bTop->hasProfileWeight())
{
// If this while loop never iterates then don't bother transforming
//
if (weightNext == BB_ZERO_WEIGHT)
if (weightTop == BB_ZERO_WEIGHT)
{
return true;
}

// We generally expect weightTest == weightNext + weightBlock.
// We generally expect weightTest > weightTop
//
// Tolerate small inconsistencies...
//
if (!fgProfileWeightsConsistent(weightBlock + weightNext, weightTest))
if (!fgProfileWeightsConsistent(weightBlock + weightTop, weightTest))
{
JITDUMP("Profile weights locally inconsistent: block " FMT_WT ", next " FMT_WT ", test " FMT_WT "\n",
weightBlock, weightNext, weightTest);
weightBlock, weightTop, weightTest);
}
else
{
allProfileWeightsAreValid = true;

// Determine iteration count
// Determine average iteration count
//
// weightNext is the number of time this loop iterates
// weightBlock is the number of times that we enter the while loop
// weightTop is the number of time this loop executes
// weightTest is the number of times that we consider entering or remaining in the loop
// loopIterations is the average number of times that this loop iterates
//
loopIterations = weightNext / weightBlock;
weight_t loopEntries = weightTest - weightTop;

// If profile is inaccurate, try and use other data to provide a credible estimate.
// The value should at least be >= weightBlock.
//
if (loopEntries < weightBlock)
{
loopEntries = weightBlock;
}

loopIterations = weightTop / loopEntries;
}
}
else
Expand Down Expand Up @@ -5132,16 +5150,33 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
// Flag the block that received the copy as potentially having various constructs.
bNewCond->bbFlags |= bTest->bbFlags & BBF_COPY_PROPAGATE;

bNewCond->bbJumpDest = bTest->bbNext;
// Fix flow and profile
//
bNewCond->bbJumpDest = bJoin;
bNewCond->inheritWeight(block);

// Update bbRefs and bbPreds for 'bNewCond', 'bNewCond->bbNext' 'bTest' and 'bTest->bbNext'.
if (allProfileWeightsAreValid)
{
weight_t const delta = weightTest - weightTop;

fgAddRefPred(bNewCond, block);
fgAddRefPred(bNewCond->bbNext, bNewCond);
// If there is just one outside edge incident on bTest, then ideally delta == block->bbWeight.
// But this might not be the case if profile data is inconsistent.
//
// And if bTest has multiple outside edges we want to account for the weight of them all.
//
if (delta > block->bbWeight)
{
bNewCond->setBBProfileWeight(delta);
}
}

// Update pred info
//
fgAddRefPred(bJoin, bNewCond);
fgAddRefPred(bTop, bNewCond);

fgAddRefPred(bNewCond, block);
fgRemoveRefPred(bTest, block);
fgAddRefPred(bTest->bbNext, bNewCond);

// Move all predecessor edges that look like loop entry edges to point to the new cloned condition
// block, not the existing condition block. The idea is that if we only move `block` to point to
Expand All @@ -5151,15 +5186,15 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
// as the proxy for predecessors that are "in" versus "out" of the potential loop. Note that correctness
// is maintained no matter which condition block we point to, but we'll lose optimization potential
// (and create spaghetti code) if we get it wrong.

//
BlockToBlockMap blockMap(getAllocator(CMK_LoopOpt));
bool blockMapInitialized = false;

unsigned loopFirstNum = bNewCond->bbNext->bbNum;
unsigned loopBottomNum = bTest->bbNum;
unsigned const loopFirstNum = bTop->bbNum;
unsigned const loopBottomNum = bTest->bbNum;
for (BasicBlock* const predBlock : bTest->PredBlocks())
{
unsigned bNum = predBlock->bbNum;
unsigned const bNum = predBlock->bbNum;
if ((loopFirstNum <= bNum) && (bNum <= loopBottomNum))
{
// Looks like the predecessor is from within the potential loop; skip it.
Expand Down Expand Up @@ -5189,8 +5224,8 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
// cases of stress modes with inconsistent weights.
//
JITDUMP("Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT "\n", bTest->bbNum, weightTest,
weightNext);
bTest->inheritWeight(block->bbNext);
weightTop);
bTest->inheritWeight(bTop);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the actual fix -- the rest is just renaming and protecting against inconsistent profile data.


// Determine the new edge weights.
//
Expand All @@ -5200,23 +5235,23 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
// Note "next" is the loop top block, not bTest's bbNext,
// we'll call this latter block "after".
//
weight_t const testToNextLikelihood = min(1.0, weightNext / weightTest);
weight_t const testToNextLikelihood = min(1.0, weightTop / weightTest);
weight_t const testToAfterLikelihood = 1.0 - testToNextLikelihood;

// Adjust edges out of bTest (which now has weight weightNext)
// Adjust edges out of bTest (which now has weight weightTop)
//
weight_t const testToNextWeight = weightNext * testToNextLikelihood;
weight_t const testToAfterWeight = weightNext * testToAfterLikelihood;
weight_t const testToNextWeight = weightTop * testToNextLikelihood;
weight_t const testToAfterWeight = weightTop * testToAfterLikelihood;

FlowEdge* const edgeTestToNext = fgGetPredForBlock(bTest->bbJumpDest, bTest);
FlowEdge* const edgeTestToNext = fgGetPredForBlock(bTop, bTest);
FlowEdge* const edgeTestToAfter = fgGetPredForBlock(bTest->bbNext, bTest);

JITDUMP("Setting weight of " FMT_BB " -> " FMT_BB " to " FMT_WT " (iterate loop)\n", bTest->bbNum,
bTest->bbJumpDest->bbNum, testToNextWeight);
JITDUMP("Setting weight of " FMT_BB " -> " FMT_BB " to " FMT_WT " (iterate loop)\n", bTest->bbNum, bTop->bbNum,
testToNextWeight);
JITDUMP("Setting weight of " FMT_BB " -> " FMT_BB " to " FMT_WT " (exit loop)\n", bTest->bbNum,
bTest->bbNext->bbNum, testToAfterWeight);

edgeTestToNext->setEdgeWeights(testToNextWeight, testToNextWeight, bTest->bbJumpDest);
edgeTestToNext->setEdgeWeights(testToNextWeight, testToNextWeight, bTop);
edgeTestToAfter->setEdgeWeights(testToAfterWeight, testToAfterWeight, bTest->bbNext);

// Adjust edges out of block, using the same distribution.
Expand Down