Skip to content

Commit 9b9aeaf

Browse files
authored
JIT: Enable phi-based jump threading when SSA updates aren't needed (#77748)
Leverage the new SSA accounting to look for cases of phi-based jump threading that will not require SSA updates. In particular cases where the phis are all locally consumed. Also update documentation on the SSA checker implementation (which aims to ensure that the SSA accounting we're relying on here is accurate).
1 parent 92971c9 commit 9b9aeaf

File tree

3 files changed

+113
-27
lines changed

3 files changed

+113
-27
lines changed

src/coreclr/jit/compiler.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -217,12 +217,15 @@ class LclSsaVarDsc
217217
GenTreeOp* m_asg = nullptr;
218218
// The SSA number associated with the previous definition for partial (GTF_USEASG) defs.
219219
unsigned m_useDefSsaNum = SsaConfig::RESERVED_SSA_NUM;
220-
// Number of uses of this SSA def (may be an over-estimate). Includes phi args uses.
220+
// Number of uses of this SSA def (may be an over-estimate).
221+
// May not be accurate for for promoted fields.
221222
unsigned short m_numUses = 0;
222223
// True if there may be phi args uses of this def
223-
// (false implies all uses are non-phi)
224+
// May not be accurate for for promoted fields.
225+
// (false implies all uses are non-phi).
224226
bool m_hasPhiUse = false;
225-
// True if there may be uses of the def in a different block
227+
// True if there may be uses of the def in a different block.
228+
// May not be accurate for for promoted fields.
226229
bool m_hasGlobalUse = false;
227230

228231
public:

src/coreclr/jit/fgdiagnostic.cpp

Lines changed: 54 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3583,12 +3583,40 @@ void Compiler::fgDebugCheckNodesUniqueness()
35833583
}
35843584
}
35853585

3586-
// SsaCheckWalker keeps data that is necessary to check if
3587-
// we are properly updating SSA uses and defs.
3586+
//------------------------------------------------------------------------------
3587+
// SsaCheckVisitor: build and maintain state about SSA uses in the IR
3588+
//
3589+
// Expects to be invoked on each root expression in each basic block that
3590+
// SSA renames (note SSA will not rename defs and uses in unreachable blocks)
3591+
// and all blocks created after SSA was built (identified by bbID).
3592+
//
3593+
// Maintains a hash table keyed by (lclNum, ssaNum) that tracks information
3594+
// about that SSA lifetime. This information is updated by each SSA use and
3595+
// def seen in the trees via ProcessUses and ProcessDefs.
3596+
//
3597+
// We can spot certain errors during collection, if local occurrences either
3598+
// unexpectedy lack or have SSA numbers.
3599+
//
3600+
// Once collection is done, DoChecks() verifies that the collected information
3601+
// is soundly approximated by the data stored in the LclSsaVarDsc entries.
3602+
//
3603+
// In particular the properties claimed for an SSA lifetime via its
3604+
// LclSsaVarDsc must be accurate or an over-estimate. We tolerate over-estimates
3605+
// as there is no good mechanism in the jit for keeping track when bits of IR
3606+
// are deleted, so over time the number and kind of uses indicated in the
3607+
// LclSsaVarDsc may show more uses and more different kinds of uses then actually
3608+
// remain in the IR.
3609+
//
3610+
// One important caveat is that for promoted locals there may be implicit uses
3611+
// (via the parent var) that do not get numbered by SSA. Neither the LclSsaVarDsc
3612+
// nor the IR will track these implicit uses. So the checking done below will
3613+
// only catch anomalies in the defs or in the explicit uses.
35883614
//
35893615
class SsaCheckVisitor : public GenTreeVisitor<SsaCheckVisitor>
35903616
{
35913617
private:
3618+
// Hash key for tracking per-SSA lifetime info
3619+
//
35923620
struct SsaKey
35933621
{
35943622
private:
@@ -3624,6 +3652,8 @@ class SsaCheckVisitor : public GenTreeVisitor<SsaCheckVisitor>
36243652
}
36253653
};
36263654

3655+
// Per-SSA lifetime info
3656+
//
36273657
struct SsaInfo
36283658
{
36293659
private:
@@ -3949,30 +3979,39 @@ class SsaCheckVisitor : public GenTreeVisitor<SsaCheckVisitor>
39493979
{
39503980
for (unsigned lclNum = 0; lclNum < m_compiler->lvaCount; lclNum++)
39513981
{
3982+
// Check each local in SSA
3983+
//
39523984
LclVarDsc* const varDsc = m_compiler->lvaGetDesc(lclNum);
39533985

39543986
if (!varDsc->lvInSsa)
39553987
{
39563988
continue;
39573989
}
39583990

3991+
// Check each SSA lifetime of that local
3992+
//
39593993
const SsaDefArray<LclSsaVarDsc>& ssaDefs = varDsc->lvPerSsaData;
39603994

39613995
for (unsigned i = 0; i < ssaDefs.GetCount(); i++)
39623996
{
39633997
LclSsaVarDsc* const ssaVarDsc = ssaDefs.GetSsaDefByIndex(i);
39643998
const unsigned ssaNum = ssaDefs.GetSsaNum(ssaVarDsc);
39653999

4000+
// Find the SSA info we gathered for this lifetime via the IR walk
4001+
//
39664002
SsaKey key(lclNum, ssaNum);
39674003
SsaInfo* ssaInfo = nullptr;
39684004

39694005
if (!m_infoMap.Lookup(key, &ssaInfo))
39704006
{
3971-
// Possibly there are no more references to this local.
4007+
// IR has no information about this lifetime.
4008+
// Possibly there are no more references.
4009+
//
39724010
continue;
39734011
}
39744012

3975-
// VarDsc block should be accurate.
4013+
// Now cross-check the gathered ssaInfo vs the LclSsaVarDsc.
4014+
// LclSsaVarDsc should have the correct def block
39764015
//
39774016
BasicBlock* const ssaInfoDefBlock = ssaInfo->GetDefBlock();
39784017
BasicBlock* const ssaVarDscDefBlock = ssaVarDsc->GetBlock();
@@ -3999,7 +4038,7 @@ class SsaCheckVisitor : public GenTreeVisitor<SsaCheckVisitor>
39994038
unsigned const ssaInfoUses = ssaInfo->GetNumUses();
40004039
unsigned const ssaVarDscUses = ssaVarDsc->GetNumUses();
40014040

4002-
// VarDsc use count must be accurate or an over-estimate
4041+
// LclSsaVarDsc use count must be accurate or an over-estimate
40034042
//
40044043
if (ssaInfoUses > ssaVarDscUses)
40054044
{
@@ -4015,7 +4054,7 @@ class SsaCheckVisitor : public GenTreeVisitor<SsaCheckVisitor>
40154054
ssaVarDscUses);
40164055
}
40174056

4018-
// HasPhiUse use must be accurate or an over-estimate
4057+
// LclSsaVarDsc HasPhiUse use must be accurate or an over-estimate
40194058
//
40204059
if (ssaInfo->HasPhiUse() && !ssaVarDsc->HasPhiUse())
40214060
{
@@ -4027,7 +4066,7 @@ class SsaCheckVisitor : public GenTreeVisitor<SsaCheckVisitor>
40274066
JITDUMP("[info] HasPhiUse overestimated for V%02u.%u\n", lclNum, ssaNum);
40284067
}
40294068

4030-
// HasGlobalUse use must be accurate or an over-estimate
4069+
// LclSsaVarDsc HasGlobalUse use must be accurate or an over-estimate
40314070
//
40324071
if (ssaInfo->HasGlobalUse() && !ssaVarDsc->HasGlobalUse())
40334072
{
@@ -4058,9 +4097,9 @@ class SsaCheckVisitor : public GenTreeVisitor<SsaCheckVisitor>
40584097
// * There is at most one SSA def for a given SSA num, and it is in the expected block.
40594098
// * Operands that should have SSA numbers have them
40604099
// * Operands that should not have SSA numbers do not have them
4061-
// * The number of SSA uses is accurate or an over-estimate
4062-
// * The local/global bit is properly set or an over-estimate
4063-
// * The has phi use bit is properly set or an over-estimate
4100+
// * GetNumUses is accurate or an over-estimate
4101+
// * HasGlobalUse is properly set or an over-estimate
4102+
// * HasPhiUse is properly set or an over-estimate
40644103
//
40654104
// Todo:
40664105
// * Try and sanity check PHIs
@@ -4077,6 +4116,7 @@ void Compiler::fgDebugCheckSsa()
40774116
assert(fgDomsComputed);
40784117

40794118
// This class visits the flow graph the same way the SSA builder does.
4119+
// In particular it may skip over blocks that SSA did not rename.
40804120
//
40814121
class SsaCheckDomTreeVisitor : public DomTreeVisitor<SsaCheckDomTreeVisitor>
40824122
{
@@ -4099,13 +4139,13 @@ void Compiler::fgDebugCheckSsa()
40994139
}
41004140
};
41014141

4102-
// Visit the blocks SSA modified
4142+
// Visit the blocks that SSA intially renamed
41034143
//
41044144
SsaCheckVisitor scv(this);
41054145
SsaCheckDomTreeVisitor visitor(this, scv);
41064146
visitor.WalkTree();
41074147

4108-
// Also visit any blocks added afterwards
4148+
// Also visit any blocks added after SSA was built
41094149
//
41104150
for (BasicBlock* const block : Blocks())
41114151
{
@@ -4115,7 +4155,8 @@ void Compiler::fgDebugCheckSsa()
41154155
}
41164156
}
41174157

4118-
// Cross-check the information gathered from IR against the info in the SSA table
4158+
// Cross-check the information gathered from IR against the info
4159+
// in the LclSsaVarDscs.
41194160
//
41204161
scv.DoChecks();
41214162

src/coreclr/jit/redundantbranchopts.cpp

Lines changed: 53 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -657,12 +657,7 @@ bool Compiler::optRedundantBranch(BasicBlock* const block)
657657
// We were unable to determine the relop value via dominance checks.
658658
// See if we can jump thread via phi disambiguation.
659659
//
660-
// optJumpThreadPhi disabled as it is exposing problems with stale SSA.
661-
// See issue #76636 and related.
662-
//
663-
// return optJumpThreadPhi(block, tree, treeNormVN);
664-
665-
return false;
660+
return optJumpThreadPhi(block, tree, treeNormVN);
666661
}
667662

668663
// Be conservative if there is an exception effect and we're in an EH region
@@ -815,21 +810,68 @@ bool Compiler::optJumpThreadCheck(BasicBlock* const block, BasicBlock* const dom
815810
// Since flow is going to bypass block, make sure there
816811
// is nothing in block that can cause a side effect.
817812
//
818-
// Note we neglect PHI assignments. This reflects a general lack of
819-
// SSA update abilities in the jit. We really should update any uses
820-
// of PHIs defined here with the corresponding PHI input operand.
813+
// For non-PHI RBO, we neglect PHI assignments. This can leave SSA
814+
// in an incorrect state but so far it has not yet caused problems.
821815
//
822-
// TODO: if block has side effects, for those predecessors that are
816+
// For PHI-based RBO we need to be more cautious and insist that
817+
// any PHI is locally consumed, so that if we bypass the block we
818+
// don't need to make SSA updates.
819+
//
820+
// TODO: handle blocks with side effects. For those predecessors that are
823821
// favorable (ones that don't reach block via a critical edge), consider
824822
// duplicating block's IR into the predecessor. This is the jump threading
825823
// analog of the optimization we encourage via fgOptimizeUncondBranchToSimpleCond.
826824
//
827825
Statement* const lastStmt = block->lastStmt();
826+
bool const isPhiRBO = (domBlock == nullptr);
828827

829-
for (Statement* const stmt : block->NonPhiStatements())
828+
for (Statement* const stmt : block->Statements())
830829
{
831830
GenTree* const tree = stmt->GetRootNode();
832831

832+
// If we are doing PHI-based RBO then all local PHIs must be locally consumed.
833+
//
834+
if (stmt->IsPhiDefnStmt())
835+
{
836+
if (isPhiRBO)
837+
{
838+
GenTreeLclVarCommon* const phiDef = tree->AsOp()->gtGetOp1()->AsLclVarCommon();
839+
unsigned const lclNum = phiDef->GetLclNum();
840+
unsigned const ssaNum = phiDef->GetSsaNum();
841+
LclVarDsc* const varDsc = lvaGetDesc(lclNum);
842+
843+
// We do not put implicit uses of promoted local fields into SSA.
844+
// So assume the worst here, that there is some implicit use of this ssa
845+
// def we don't know about.
846+
//
847+
if (varDsc->lvIsStructField)
848+
{
849+
JITDUMP(FMT_BB " has phi for promoted field V%02u.%u; no phi-based threading\n", block->bbNum,
850+
lclNum, ssaNum);
851+
return false;
852+
}
853+
854+
LclSsaVarDsc* const ssaVarDsc = varDsc->GetPerSsaData(ssaNum);
855+
856+
// Bypassing a global use might require SSA updates.
857+
// Note a phi use is ok if it's local (self loop)
858+
//
859+
if (ssaVarDsc->HasGlobalUse())
860+
{
861+
JITDUMP(FMT_BB " has global phi for V%02u.%u; no phi-based threading\n", block->bbNum, lclNum,
862+
ssaNum);
863+
return false;
864+
}
865+
}
866+
867+
// We are either not doing PHI-based RBO or this PHI won't cause
868+
// problems. Carry on.
869+
//
870+
continue;
871+
}
872+
873+
// This is a "real" statement.
874+
//
833875
// We can ignore exception side effects in the jump tree.
834876
//
835877
// They are covered by the exception effects in the dominating compare.

0 commit comments

Comments
 (0)