Skip to content

Commit 4b5491e

Browse files
authored
JIT: use synthesis to repair some reconstruction issues (#84312)
In particular, run synthesis in repair mode for cases where there are profile counts within the method but zero counts in `fgFirstBB`. Recall that sparse profiling effectively probes return blocks to determine the method entry count. So the zero-entry but not zero-everywhere case can happen if we have a method with a very long running loop plus sparse profiling plus OSR -- we will only get profile counts from the instrumented Tier0 method, and it will never return (instead it will always escape to an OSR version which will eventually return, but that version won't be instrumented). I originally was a bit more ambitious and ran repair for a broader set of reconstruction issues, but lead to a large number of diffs, in part because repair doesn't cope well with irreducible loops. Leaving the entry count zero can have fairly disastrous impact on the quality of optimizations done in the method. Addresses quite a few of the worst-performing benchmarks in #84264.
1 parent 70d00e4 commit 4b5491e

File tree

3 files changed

+73
-59
lines changed

3 files changed

+73
-59
lines changed

src/coreclr/jit/compiler.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5703,8 +5703,8 @@ class Compiler
57035703
PhaseStatus fgPrepareToInstrumentMethod();
57045704
PhaseStatus fgInstrumentMethod();
57055705
PhaseStatus fgIncorporateProfileData();
5706-
void fgIncorporateBlockCounts();
5707-
void fgIncorporateEdgeCounts();
5706+
bool fgIncorporateBlockCounts();
5707+
bool fgIncorporateEdgeCounts();
57085708

57095709
public:
57105710
const char* fgPgoFailReason;

src/coreclr/jit/fgprofile.cpp

Lines changed: 67 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -2606,18 +2606,35 @@ PhaseStatus Compiler::fgIncorporateProfileData()
26062606

26072607
fgPgoHaveWeights = haveBlockCounts || haveEdgeCounts;
26082608

2609-
// We expect not to have both block and edge counts. We may have other
2610-
// forms of profile data even if we do not have any counts.
2611-
//
2612-
assert(!haveBlockCounts || !haveEdgeCounts);
2613-
2614-
if (haveBlockCounts)
2609+
if (fgPgoHaveWeights)
26152610
{
2616-
fgIncorporateBlockCounts();
2617-
}
2618-
else if (haveEdgeCounts)
2619-
{
2620-
fgIncorporateEdgeCounts();
2611+
// We expect not to have both block and edge counts. We may have other
2612+
// forms of profile data even if we do not have any counts.
2613+
//
2614+
assert(!haveBlockCounts || !haveEdgeCounts);
2615+
2616+
bool dataIsGood = false;
2617+
2618+
if (haveBlockCounts)
2619+
{
2620+
dataIsGood = fgIncorporateBlockCounts();
2621+
}
2622+
else if (haveEdgeCounts)
2623+
{
2624+
dataIsGood = fgIncorporateEdgeCounts();
2625+
}
2626+
2627+
// Profile incorporation may have tossed out all PGO data if it
2628+
// encountered major issues. This is perhaps too drastic. Consider
2629+
// at least keeping the class profile data, or perhaps enable full synthesis.
2630+
//
2631+
// If profile incorporation hit fixable problems, run synthesis in repair mode.
2632+
//
2633+
if (fgPgoHaveWeights && !dataIsGood)
2634+
{
2635+
JITDUMP("\nIncorporated count data had inconsistencies; repairing profile...\n");
2636+
ProfileSynthesis::Run(this, ProfileSynthesisOption::RepairLikelihoods);
2637+
}
26212638
}
26222639

26232640
#ifdef DEBUG
@@ -2667,6 +2684,9 @@ void Compiler::fgSetProfileWeight(BasicBlock* block, weight_t profileWeight)
26672684
// fgIncorporateBlockCounts: read block count based profile data
26682685
// and set block weights
26692686
//
2687+
// Returns:
2688+
// True if data is in good shape
2689+
//
26702690
// Notes:
26712691
// Since we are now running before the importer, we do not know which
26722692
// blocks will be imported, and we should not see any internal blocks.
@@ -2680,7 +2700,7 @@ void Compiler::fgSetProfileWeight(BasicBlock* block, weight_t profileWeight)
26802700
// Find some other mechanism for handling cases where handler entry
26812701
// blocks must be in the hot section.
26822702
//
2683-
void Compiler::fgIncorporateBlockCounts()
2703+
bool Compiler::fgIncorporateBlockCounts()
26842704
{
26852705
for (BasicBlock* const block : Blocks())
26862706
{
@@ -2691,6 +2711,10 @@ void Compiler::fgIncorporateBlockCounts()
26912711
fgSetProfileWeight(block, profileWeight);
26922712
}
26932713
}
2714+
2715+
// For now assume data is always good.
2716+
//
2717+
return true;
26942718
}
26952719

26962720
//------------------------------------------------------------------------
@@ -2871,6 +2895,7 @@ class EfficientEdgeCountReconstructor : public SpanningTreeVisitor
28712895
bool m_negativeCount;
28722896
bool m_failedToConverge;
28732897
bool m_allWeightsZero;
2898+
bool m_entryWeightZero;
28742899

28752900
public:
28762901
EfficientEdgeCountReconstructor(Compiler* comp)
@@ -2889,6 +2914,7 @@ class EfficientEdgeCountReconstructor : public SpanningTreeVisitor
28892914
, m_negativeCount(false)
28902915
, m_failedToConverge(false)
28912916
, m_allWeightsZero(true)
2917+
, m_entryWeightZero(false)
28922918
{
28932919
}
28942920

@@ -2916,6 +2942,24 @@ class EfficientEdgeCountReconstructor : public SpanningTreeVisitor
29162942
m_failedToConverge = true;
29172943
}
29182944

2945+
void EntryWeightZero()
2946+
{
2947+
m_entryWeightZero = true;
2948+
}
2949+
2950+
// Are there are reparable issues with the reconstruction?
2951+
//
2952+
// Ideally we'd also have || !m_negativeCount here, but this
2953+
// leads to lots of diffs in async methods.
2954+
//
2955+
// Looks like we might first need to resolve reconstruction
2956+
// shortcomings with irreducible loops.
2957+
//
2958+
bool IsGood() const
2959+
{
2960+
return !m_entryWeightZero;
2961+
}
2962+
29192963
void VisitBlock(BasicBlock*) override
29202964
{
29212965
}
@@ -3381,52 +3425,15 @@ void EfficientEdgeCountReconstructor::Solve()
33813425

33823426
JITDUMP("\nSolver: converged in %u passes\n", nPasses);
33833427

3384-
// If, after solving, the entry weight ends up as zero, set it to
3385-
// the max of the weight of successor edges or join-free successor
3386-
// block weight. We do this so we can determine a plausible scale
3387-
// count.
3388-
//
3389-
// This can happen for methods that do not return (say they always
3390-
// throw, or had not yet returned when we snapped the counts).
3391-
//
3392-
// Note we know there are nonzero counts elsewhere in the method, otherwise
3393-
// m_allWeightsZero would be true and we would have bailed out above.
3428+
// If, after solving, the entry weight ends up as zero, note
3429+
// this so we can run a profile repair immediately.
33943430
//
33953431
BlockInfo* const firstInfo = BlockToInfo(m_comp->fgFirstBB);
33963432
if (firstInfo->m_weight == BB_ZERO_WEIGHT)
33973433
{
33983434
assert(!m_allWeightsZero);
3399-
3400-
weight_t newWeight = BB_ZERO_WEIGHT;
3401-
3402-
for (Edge* edge = firstInfo->m_outgoingEdges; edge != nullptr; edge = edge->m_nextOutgoingEdge)
3403-
{
3404-
if (edge->m_weightKnown)
3405-
{
3406-
newWeight = max(newWeight, edge->m_weight);
3407-
}
3408-
3409-
BlockInfo* const targetBlockInfo = BlockToInfo(edge->m_targetBlock);
3410-
Edge* const targetBlockEdges = targetBlockInfo->m_incomingEdges;
3411-
3412-
if (targetBlockInfo->m_weightKnown && (targetBlockEdges->m_nextIncomingEdge == nullptr))
3413-
{
3414-
newWeight = max(newWeight, targetBlockInfo->m_weight);
3415-
}
3416-
}
3417-
3418-
if (newWeight == BB_ZERO_WEIGHT)
3419-
{
3420-
// TODO -- throw out profile data or trigger repair/synthesis.
3421-
//
3422-
JITDUMP("Entry block weight and neighborhood was zero\n");
3423-
}
3424-
else
3425-
{
3426-
JITDUMP("Entry block weight was zero, setting entry weight to neighborhood max " FMT_WT "\n", newWeight);
3427-
}
3428-
3429-
firstInfo->m_weight = newWeight;
3435+
JITDUMP("\nSolver: entry block weight is zero\n");
3436+
EntryWeightZero();
34303437
}
34313438
}
34323439

@@ -3436,9 +3443,6 @@ void EfficientEdgeCountReconstructor::Solve()
34363443
//
34373444
void EfficientEdgeCountReconstructor::Propagate()
34383445
{
3439-
// We don't expect mismatches or convergence failures.
3440-
//
3441-
34423446
// Mismatches are currently expected as the flow for static pgo doesn't prevent them now.
34433447
// assert(!m_mismatch);
34443448

@@ -3939,6 +3943,10 @@ void EfficientEdgeCountReconstructor::MarkInterestingSwitches(BasicBlock* block,
39393943
// fgIncorporateEdgeCounts: read sparse edge count based profile data
39403944
// and set block weights
39413945
//
3946+
// Returns:
3947+
// true if incorporated profile is in good shape (consistent, etc).
3948+
// false if some repair seems necessary
3949+
//
39423950
// Notes:
39433951
// Because edge counts are sparse, we need to solve for the missing
39443952
// edge counts; in the process, we also determine block counts.
@@ -3948,7 +3956,7 @@ void EfficientEdgeCountReconstructor::MarkInterestingSwitches(BasicBlock* block,
39483956
// Since we have edge weights here, we might as well set them
39493957
// (or likelihoods)
39503958
//
3951-
void Compiler::fgIncorporateEdgeCounts()
3959+
bool Compiler::fgIncorporateEdgeCounts()
39523960
{
39533961
JITDUMP("\nReconstructing block counts from sparse edge instrumentation\n");
39543962

@@ -3957,6 +3965,8 @@ void Compiler::fgIncorporateEdgeCounts()
39573965
WalkSpanningTree(&e);
39583966
e.Solve();
39593967
e.Propagate();
3968+
3969+
return e.IsGood();
39603970
}
39613971

39623972
//------------------------------------------------------------------------

src/coreclr/jit/fgprofilesynthesis.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ void ProfileSynthesis::Run(ProfileSynthesisOption option)
6464
RandomizeLikelihoods();
6565
break;
6666

67+
case ProfileSynthesisOption::RepairLikelihoods:
68+
RepairLikelihoods();
69+
break;
70+
6771
default:
6872
assert(!"unexpected profile synthesis option");
6973
break;

0 commit comments

Comments
 (0)