@@ -4875,22 +4875,28 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
4875
4875
}
4876
4876
4877
4877
// Get hold of the jump target
4878
- BasicBlock* bTest = block->bbJumpDest ;
4878
+ BasicBlock* const bTest = block->bbJumpDest ;
4879
4879
4880
- // Does the block consist of 'jtrue(cond) block' ?
4880
+ // Does the bTest consist of 'jtrue(cond) block' ?
4881
4881
if (bTest->bbJumpKind != BBJ_COND)
4882
4882
{
4883
4883
return false ;
4884
4884
}
4885
4885
4886
4886
// bTest must be a backwards jump to block->bbNext
4887
- if (bTest->bbJumpDest != block->bbNext )
4887
+ // This will be the top of the loop.
4888
+ //
4889
+ BasicBlock* const bTop = bTest->bbJumpDest ;
4890
+
4891
+ if (bTop != block->bbNext )
4888
4892
{
4889
4893
return false ;
4890
4894
}
4891
4895
4892
- // Since test is a BBJ_COND it will have a bbNext
4893
- noway_assert (bTest->bbNext != nullptr );
4896
+ // Since bTest is a BBJ_COND it will have a bbNext
4897
+ //
4898
+ BasicBlock* const bJoin = bTest->bbNext ;
4899
+ noway_assert (bJoin != nullptr );
4894
4900
4895
4901
// 'block' must be in the same try region as the condition, since we're going to insert a duplicated condition
4896
4902
// in a new block after 'block', and the condition might include exception throwing code.
@@ -4903,8 +4909,7 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
4903
4909
4904
4910
// The duplicated condition block will branch to bTest->bbNext, so that also better be in the
4905
4911
// same try region (or no try region) to avoid generating illegal flow.
4906
- BasicBlock* bTestNext = bTest->bbNext ;
4907
- if (bTestNext->hasTryIndex () && !BasicBlock::sameTryRegion (block, bTestNext))
4912
+ if (bJoin->hasTryIndex () && !BasicBlock::sameTryRegion (block, bJoin))
4908
4913
{
4909
4914
return false ;
4910
4915
}
@@ -4919,7 +4924,7 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
4919
4924
}
4920
4925
4921
4926
// Find the loop termination test at the bottom of the loop.
4922
- Statement* condStmt = bTest->lastStmt ();
4927
+ Statement* const condStmt = bTest->lastStmt ();
4923
4928
4924
4929
// Verify the test block ends with a conditional that we can manipulate.
4925
4930
GenTree* const condTree = condStmt->GetRootNode ();
@@ -4929,6 +4934,9 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
4929
4934
return false ;
4930
4935
}
4931
4936
4937
+ JITDUMP (" Matched flow pattern for loop inversion: block " FMT_BB " bTop " FMT_BB " bTest " FMT_BB " \n " ,
4938
+ block->bbNum , bTop->bbNum , bTest->bbNum );
4939
+
4932
4940
// Estimate the cost of cloning the entire test block.
4933
4941
//
4934
4942
// Note: it would help throughput to compute the maximum cost
@@ -4956,43 +4964,53 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
4956
4964
bool allProfileWeightsAreValid = false ;
4957
4965
weight_t const weightBlock = block->bbWeight ;
4958
4966
weight_t const weightTest = bTest->bbWeight ;
4959
- weight_t const weightNext = block-> bbNext ->bbWeight ;
4967
+ weight_t const weightTop = bTop ->bbWeight ;
4960
4968
4961
4969
// If we have profile data then we calculate the number of times
4962
4970
// the loop will iterate into loopIterations
4963
4971
if (fgIsUsingProfileWeights ())
4964
4972
{
4965
4973
// Only rely upon the profile weight when all three of these blocks
4966
4974
// have good profile weights
4967
- if (block->hasProfileWeight () && bTest->hasProfileWeight () && block-> bbNext ->hasProfileWeight ())
4975
+ if (block->hasProfileWeight () && bTest->hasProfileWeight () && bTop ->hasProfileWeight ())
4968
4976
{
4969
4977
// If this while loop never iterates then don't bother transforming
4970
4978
//
4971
- if (weightNext == BB_ZERO_WEIGHT)
4979
+ if (weightTop == BB_ZERO_WEIGHT)
4972
4980
{
4973
4981
return true ;
4974
4982
}
4975
4983
4976
- // We generally expect weightTest == weightNext + weightBlock.
4984
+ // We generally expect weightTest > weightTop
4977
4985
//
4978
4986
// Tolerate small inconsistencies...
4979
4987
//
4980
- if (!fgProfileWeightsConsistent (weightBlock + weightNext , weightTest))
4988
+ if (!fgProfileWeightsConsistent (weightBlock + weightTop , weightTest))
4981
4989
{
4982
4990
JITDUMP (" Profile weights locally inconsistent: block " FMT_WT " , next " FMT_WT " , test " FMT_WT " \n " ,
4983
- weightBlock, weightNext , weightTest);
4991
+ weightBlock, weightTop , weightTest);
4984
4992
}
4985
4993
else
4986
4994
{
4987
4995
allProfileWeightsAreValid = true ;
4988
4996
4989
- // Determine iteration count
4997
+ // Determine average iteration count
4990
4998
//
4991
- // weightNext is the number of time this loop iterates
4992
- // weightBlock is the number of times that we enter the while loop
4999
+ // weightTop is the number of time this loop executes
5000
+ // weightTest is the number of times that we consider entering or remaining in the loop
4993
5001
// loopIterations is the average number of times that this loop iterates
4994
5002
//
4995
- loopIterations = weightNext / weightBlock;
5003
+ weight_t loopEntries = weightTest - weightTop;
5004
+
5005
+ // If profile is inaccurate, try and use other data to provide a credible estimate.
5006
+ // The value should at least be >= weightBlock.
5007
+ //
5008
+ if (loopEntries < weightBlock)
5009
+ {
5010
+ loopEntries = weightBlock;
5011
+ }
5012
+
5013
+ loopIterations = weightTop / loopEntries;
4996
5014
}
4997
5015
}
4998
5016
else
@@ -5132,16 +5150,33 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
5132
5150
// Flag the block that received the copy as potentially having various constructs.
5133
5151
bNewCond->bbFlags |= bTest->bbFlags & BBF_COPY_PROPAGATE;
5134
5152
5135
- bNewCond->bbJumpDest = bTest->bbNext ;
5153
+ // Fix flow and profile
5154
+ //
5155
+ bNewCond->bbJumpDest = bJoin;
5136
5156
bNewCond->inheritWeight (block);
5137
5157
5138
- // Update bbRefs and bbPreds for 'bNewCond', 'bNewCond->bbNext' 'bTest' and 'bTest->bbNext'.
5158
+ if (allProfileWeightsAreValid)
5159
+ {
5160
+ weight_t const delta = weightTest - weightTop;
5139
5161
5140
- fgAddRefPred (bNewCond, block);
5141
- fgAddRefPred (bNewCond->bbNext , bNewCond);
5162
+ // If there is just one outside edge incident on bTest, then ideally delta == block->bbWeight.
5163
+ // But this might not be the case if profile data is inconsistent.
5164
+ //
5165
+ // And if bTest has multiple outside edges we want to account for the weight of them all.
5166
+ //
5167
+ if (delta > block->bbWeight )
5168
+ {
5169
+ bNewCond->setBBProfileWeight (delta);
5170
+ }
5171
+ }
5142
5172
5173
+ // Update pred info
5174
+ //
5175
+ fgAddRefPred (bJoin, bNewCond);
5176
+ fgAddRefPred (bTop, bNewCond);
5177
+
5178
+ fgAddRefPred (bNewCond, block);
5143
5179
fgRemoveRefPred (bTest, block);
5144
- fgAddRefPred (bTest->bbNext , bNewCond);
5145
5180
5146
5181
// Move all predecessor edges that look like loop entry edges to point to the new cloned condition
5147
5182
// block, not the existing condition block. The idea is that if we only move `block` to point to
@@ -5151,15 +5186,15 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
5151
5186
// as the proxy for predecessors that are "in" versus "out" of the potential loop. Note that correctness
5152
5187
// is maintained no matter which condition block we point to, but we'll lose optimization potential
5153
5188
// (and create spaghetti code) if we get it wrong.
5154
-
5189
+ //
5155
5190
BlockToBlockMap blockMap (getAllocator (CMK_LoopOpt));
5156
5191
bool blockMapInitialized = false ;
5157
5192
5158
- unsigned loopFirstNum = bNewCond-> bbNext ->bbNum ;
5159
- unsigned loopBottomNum = bTest->bbNum ;
5193
+ unsigned const loopFirstNum = bTop ->bbNum ;
5194
+ unsigned const loopBottomNum = bTest->bbNum ;
5160
5195
for (BasicBlock* const predBlock : bTest->PredBlocks ())
5161
5196
{
5162
- unsigned bNum = predBlock->bbNum ;
5197
+ unsigned const bNum = predBlock->bbNum ;
5163
5198
if ((loopFirstNum <= bNum) && (bNum <= loopBottomNum))
5164
5199
{
5165
5200
// Looks like the predecessor is from within the potential loop; skip it.
@@ -5189,8 +5224,8 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
5189
5224
// cases of stress modes with inconsistent weights.
5190
5225
//
5191
5226
JITDUMP (" Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT " \n " , bTest->bbNum , weightTest,
5192
- weightNext );
5193
- bTest->inheritWeight (block-> bbNext );
5227
+ weightTop );
5228
+ bTest->inheritWeight (bTop );
5194
5229
5195
5230
// Determine the new edge weights.
5196
5231
//
@@ -5200,23 +5235,23 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
5200
5235
// Note "next" is the loop top block, not bTest's bbNext,
5201
5236
// we'll call this latter block "after".
5202
5237
//
5203
- weight_t const testToNextLikelihood = min (1.0 , weightNext / weightTest);
5238
+ weight_t const testToNextLikelihood = min (1.0 , weightTop / weightTest);
5204
5239
weight_t const testToAfterLikelihood = 1.0 - testToNextLikelihood;
5205
5240
5206
- // Adjust edges out of bTest (which now has weight weightNext )
5241
+ // Adjust edges out of bTest (which now has weight weightTop )
5207
5242
//
5208
- weight_t const testToNextWeight = weightNext * testToNextLikelihood;
5209
- weight_t const testToAfterWeight = weightNext * testToAfterLikelihood;
5243
+ weight_t const testToNextWeight = weightTop * testToNextLikelihood;
5244
+ weight_t const testToAfterWeight = weightTop * testToAfterLikelihood;
5210
5245
5211
- FlowEdge* const edgeTestToNext = fgGetPredForBlock (bTest-> bbJumpDest , bTest);
5246
+ FlowEdge* const edgeTestToNext = fgGetPredForBlock (bTop , bTest);
5212
5247
FlowEdge* const edgeTestToAfter = fgGetPredForBlock (bTest->bbNext , bTest);
5213
5248
5214
- JITDUMP (" Setting weight of " FMT_BB " -> " FMT_BB " to " FMT_WT " (iterate loop)\n " , bTest->bbNum ,
5215
- bTest-> bbJumpDest -> bbNum , testToNextWeight);
5249
+ JITDUMP (" Setting weight of " FMT_BB " -> " FMT_BB " to " FMT_WT " (iterate loop)\n " , bTest->bbNum , bTop-> bbNum ,
5250
+ testToNextWeight);
5216
5251
JITDUMP (" Setting weight of " FMT_BB " -> " FMT_BB " to " FMT_WT " (exit loop)\n " , bTest->bbNum ,
5217
5252
bTest->bbNext ->bbNum , testToAfterWeight);
5218
5253
5219
- edgeTestToNext->setEdgeWeights (testToNextWeight, testToNextWeight, bTest-> bbJumpDest );
5254
+ edgeTestToNext->setEdgeWeights (testToNextWeight, testToNextWeight, bTop );
5220
5255
edgeTestToAfter->setEdgeWeights (testToAfterWeight, testToAfterWeight, bTest->bbNext );
5221
5256
5222
5257
// Adjust edges out of block, using the same distribution.
0 commit comments