Skip to content

Commit 8b2dc46

Browse files
authored
JIT: Insert writebacks more eagerly (#87869)
Before starting replacements within a statement we now look for physically promoted struct locals passed as call args and write those back if necessary. This has two benefits: 1. It avoids creating a lot of comma nodes that we would otherwise create 2. More importantly, it allows morph's last-use copy omission to kick in because we no longer have complex trees as the argument. Fix #87471
1 parent e2c1534 commit 8b2dc46

File tree

3 files changed

+215
-89
lines changed

3 files changed

+215
-89
lines changed

src/coreclr/jit/promotion.cpp

Lines changed: 205 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1948,52 +1948,8 @@ void ReplaceVisitor::StartStatement(Statement* stmt)
19481948
m_madeChanges = false;
19491949
m_mayHaveForwardSub = false;
19501950

1951-
if (m_numPendingReadBacks == 0)
1952-
{
1953-
return;
1954-
}
1955-
1956-
// If we have pending readbacks then insert them as new statements for any
1957-
// local that the statement is using. We could leave this up to ReplaceLocal
1958-
// but do it here for three reasons:
1959-
// 1. For QMARKs we cannot actually leave it up to ReplaceLocal since the
1960-
// local may be conditionally executed
1961-
// 2. This allows forward-sub to kick in
1962-
// 3. Creating embedded stores in ReplaceLocal disables local copy prop for
1963-
// that local (see ReplaceLocal).
1964-
1965-
for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList())
1966-
{
1967-
if (lcl->TypeIs(TYP_STRUCT))
1968-
{
1969-
continue;
1970-
}
1971-
1972-
AggregateInfo* agg = m_aggregates[lcl->GetLclNum()];
1973-
if (agg == nullptr)
1974-
{
1975-
continue;
1976-
}
1977-
1978-
size_t index = Promotion::BinarySearch<Replacement, &Replacement::Offset>(agg->Replacements, lcl->GetLclOffs());
1979-
if ((ssize_t)index < 0)
1980-
{
1981-
continue;
1982-
}
1983-
1984-
Replacement& rep = agg->Replacements[index];
1985-
if (rep.NeedsReadBack)
1986-
{
1987-
JITDUMP("Reading back replacement V%02u.[%03u..%03u) -> V%02u before [%06u]:\n", agg->LclNum, rep.Offset,
1988-
rep.Offset + genTypeSize(rep.AccessType), rep.LclNum, Compiler::dspTreeID(stmt->GetRootNode()));
1989-
1990-
GenTree* readBack = Promotion::CreateReadBack(m_compiler, agg->LclNum, rep);
1991-
Statement* stmt = m_compiler->fgNewStmtFromTree(readBack);
1992-
DISPSTMT(stmt);
1993-
m_compiler->fgInsertStmtBefore(m_currentBlock, m_currentStmt, stmt);
1994-
ClearNeedsReadBack(rep);
1995-
}
1996-
}
1951+
InsertPreStatementWriteBacks();
1952+
InsertPreStatementReadBacks();
19971953
}
19981954

19991955
//------------------------------------------------------------------------
@@ -2012,7 +1968,7 @@ Compiler::fgWalkResult ReplaceVisitor::PostOrderVisit(GenTree** use, GenTree* us
20121968
{
20131969
GenTree* tree = *use;
20141970

2015-
use = InsertMidTreeReadBacksIfNecessary(use);
1971+
use = InsertMidTreeReadBacks(use);
20161972

20171973
if (tree->OperIsStore())
20181974
{
@@ -2108,7 +2064,162 @@ void ReplaceVisitor::ClearNeedsReadBack(Replacement& rep)
21082064
}
21092065

21102066
//------------------------------------------------------------------------
2111-
// InsertMidTreeReadBacksIfNecessary:
2067+
// InsertPreStatementReadBacks:
2068+
// Insert readbacks before starting the current statement.
2069+
//
2070+
void ReplaceVisitor::InsertPreStatementReadBacks()
2071+
{
2072+
if (m_numPendingReadBacks <= 0)
2073+
{
2074+
return;
2075+
}
2076+
2077+
// If we have pending readbacks then insert them as new statements for any
2078+
// local that the statement is using. We could leave this up to ReplaceLocal
2079+
// but do it here for three reasons:
2080+
// 1. For QMARKs we cannot actually leave it up to ReplaceLocal since the
2081+
// local may be conditionally executed
2082+
// 2. This allows forward-sub to kick in
2083+
// 3. Creating embedded stores in ReplaceLocal disables local copy prop for
2084+
// that local (see ReplaceLocal).
2085+
2086+
for (GenTreeLclVarCommon* lcl : m_currentStmt->LocalsTreeList())
2087+
{
2088+
if (lcl->TypeIs(TYP_STRUCT))
2089+
{
2090+
continue;
2091+
}
2092+
2093+
AggregateInfo* agg = m_aggregates[lcl->GetLclNum()];
2094+
if (agg == nullptr)
2095+
{
2096+
continue;
2097+
}
2098+
2099+
size_t index = Promotion::BinarySearch<Replacement, &Replacement::Offset>(agg->Replacements, lcl->GetLclOffs());
2100+
if ((ssize_t)index < 0)
2101+
{
2102+
continue;
2103+
}
2104+
2105+
Replacement& rep = agg->Replacements[index];
2106+
if (rep.NeedsReadBack)
2107+
{
2108+
JITDUMP("Reading back replacement V%02u.[%03u..%03u) -> V%02u before [%06u]:\n", agg->LclNum, rep.Offset,
2109+
rep.Offset + genTypeSize(rep.AccessType), rep.LclNum,
2110+
Compiler::dspTreeID(m_currentStmt->GetRootNode()));
2111+
2112+
GenTree* readBack = Promotion::CreateReadBack(m_compiler, agg->LclNum, rep);
2113+
Statement* stmt = m_compiler->fgNewStmtFromTree(readBack);
2114+
DISPSTMT(stmt);
2115+
m_compiler->fgInsertStmtBefore(m_currentBlock, m_currentStmt, stmt);
2116+
ClearNeedsReadBack(rep);
2117+
}
2118+
}
2119+
}
2120+
2121+
//------------------------------------------------------------------------
2122+
// VisitOverlappingReplacements:
2123+
// Call a function for every replacement that overlaps a specified segment.
2124+
//
2125+
// Parameters:
2126+
// lcl - The local
2127+
// offs - Start offset of the segment
2128+
// size - Size of the segment
2129+
// func - Callback
2130+
//
2131+
template <typename Func>
2132+
void ReplaceVisitor::VisitOverlappingReplacements(unsigned lcl, unsigned offs, unsigned size, Func func)
2133+
{
2134+
if (m_aggregates[lcl] == nullptr)
2135+
{
2136+
return;
2137+
}
2138+
2139+
jitstd::vector<Replacement>& replacements = m_aggregates[lcl]->Replacements;
2140+
size_t index = Promotion::BinarySearch<Replacement, &Replacement::Offset>(replacements, offs);
2141+
2142+
if ((ssize_t)index < 0)
2143+
{
2144+
index = ~index;
2145+
if ((index > 0) && replacements[index - 1].Overlaps(offs, size))
2146+
{
2147+
index--;
2148+
}
2149+
}
2150+
2151+
unsigned end = offs + size;
2152+
while ((index < replacements.size()) && (replacements[index].Offset < end))
2153+
{
2154+
Replacement& rep = replacements[index];
2155+
func(rep);
2156+
2157+
index++;
2158+
}
2159+
}
2160+
2161+
//------------------------------------------------------------------------
2162+
// InsertPreStatementWriteBacks:
2163+
// Write back promoted fields for the upcoming statement if it may be
2164+
// beneficial to do so.
2165+
//
2166+
void ReplaceVisitor::InsertPreStatementWriteBacks()
2167+
{
2168+
GenTree* rootNode = m_currentStmt->GetRootNode();
2169+
if ((rootNode->gtFlags & GTF_CALL) == 0)
2170+
{
2171+
return;
2172+
}
2173+
2174+
class Visitor : public GenTreeVisitor<Visitor>
2175+
{
2176+
ReplaceVisitor* m_replacer;
2177+
2178+
public:
2179+
enum
2180+
{
2181+
DoPreOrder = true,
2182+
};
2183+
2184+
Visitor(Compiler* comp, ReplaceVisitor* replacer) : GenTreeVisitor(comp), m_replacer(replacer)
2185+
{
2186+
}
2187+
2188+
fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
2189+
{
2190+
GenTree* node = *use;
2191+
if ((node->gtFlags & GTF_CALL) == 0)
2192+
{
2193+
return fgWalkResult::WALK_SKIP_SUBTREES;
2194+
}
2195+
2196+
if (node->IsCall())
2197+
{
2198+
GenTreeCall* call = node->AsCall();
2199+
for (CallArg& arg : call->gtArgs.Args())
2200+
{
2201+
GenTree* node = arg.GetNode()->gtEffectiveVal();
2202+
if (!node->TypeIs(TYP_STRUCT) || !node->OperIsLocalRead())
2203+
{
2204+
continue;
2205+
}
2206+
2207+
GenTreeLclVarCommon* lcl = node->AsLclVarCommon();
2208+
m_replacer->WriteBackBeforeCurrentStatement(lcl->GetLclNum(), lcl->GetLclOffs(),
2209+
lcl->GetLayout(m_compiler)->GetSize());
2210+
}
2211+
}
2212+
2213+
return fgWalkResult::WALK_CONTINUE;
2214+
}
2215+
};
2216+
2217+
Visitor visitor(m_compiler, this);
2218+
visitor.WalkTree(m_currentStmt->GetRootNodePointer(), nullptr);
2219+
}
2220+
2221+
//------------------------------------------------------------------------
2222+
// InsertMidTreeReadBacks:
21122223
// If necessary, insert IR to read back all replacements before the specified use.
21132224
//
21142225
// Parameters:
@@ -2128,7 +2239,7 @@ void ReplaceVisitor::ClearNeedsReadBack(Replacement& rep)
21282239
// try-region (or filter block) and we find a tree that may throw it eagerly
21292240
// inserts pending readbacks.
21302241
//
2131-
GenTree** ReplaceVisitor::InsertMidTreeReadBacksIfNecessary(GenTree** use)
2242+
GenTree** ReplaceVisitor::InsertMidTreeReadBacks(GenTree** use)
21322243
{
21332244
if ((m_numPendingReadBacks == 0) || !m_compiler->ehBlockHasExnFlowDsc(m_currentBlock))
21342245
{
@@ -2305,9 +2416,9 @@ void ReplaceVisitor::ReplaceLocal(GenTree** use, GenTree* user)
23052416

23062417
assert(effectiveUser->OperIs(GT_CALL, GT_RETURN));
23072418
unsigned size = lcl->GetLayout(m_compiler)->GetSize();
2308-
WriteBackBefore(use, lclNum, lcl->GetLclOffs(), size);
2419+
WriteBackBeforeUse(use, lclNum, lcl->GetLclOffs(), size);
23092420

2310-
if ((m_aggregates[lclNum] != nullptr) && IsPromotedStructLocalDying(lcl))
2421+
if (IsPromotedStructLocalDying(lcl))
23112422
{
23122423
lcl->gtFlags |= GTF_VAR_DEATH;
23132424
CheckForwardSubForLastUse(lclNum);
@@ -2361,8 +2472,8 @@ void ReplaceVisitor::ReplaceLocal(GenTree** use, GenTree* user)
23612472
else if (rep.NeedsReadBack)
23622473
{
23632474
// This is an uncommon case -- typically all readbacks are handled in
2364-
// ReplaceVisitor::StartStatement. This case is still needed to handle
2365-
// the situation where the readback was marked previously in this tree
2475+
// InsertPreStatementReadBacks. This case is still needed to handle the
2476+
// situation where the readback was marked previously in this tree
23662477
// (e.g. due to a COMMA).
23672478

23682479
JITDUMP(" ..needs a read back\n");
@@ -2426,7 +2537,34 @@ void ReplaceVisitor::CheckForwardSubForLastUse(unsigned lclNum)
24262537
}
24272538

24282539
//------------------------------------------------------------------------
2429-
// WriteBackBefore:
2540+
// WriteBackBeforeCurrentStatement:
2541+
// Insert statements before the current that write back all overlapping
2542+
// replacements.
2543+
//
2544+
// Parameters:
2545+
// lcl - The struct local
2546+
// offs - The starting offset into the struct local of the overlapping range to write back to
2547+
// size - The size of the overlapping range
2548+
//
2549+
void ReplaceVisitor::WriteBackBeforeCurrentStatement(unsigned lcl, unsigned offs, unsigned size)
2550+
{
2551+
VisitOverlappingReplacements(lcl, offs, size, [this, lcl](Replacement& rep) {
2552+
if (!rep.NeedsWriteBack)
2553+
{
2554+
return;
2555+
}
2556+
2557+
GenTree* readBack = Promotion::CreateWriteBack(m_compiler, lcl, rep);
2558+
Statement* stmt = m_compiler->fgNewStmtFromTree(readBack);
2559+
JITDUMP("Writing back %s before " FMT_STMT "\n", rep.Description, stmt->GetID());
2560+
DISPSTMT(stmt);
2561+
m_compiler->fgInsertStmtBefore(m_currentBlock, m_currentStmt, stmt);
2562+
ClearNeedsWriteBack(rep);
2563+
});
2564+
}
2565+
2566+
//------------------------------------------------------------------------
2567+
// WriteBackBeforeUse:
24302568
// Update the use with IR that writes back all necessary overlapping
24312569
// replacements into a struct local.
24322570
//
@@ -2436,42 +2574,22 @@ void ReplaceVisitor::CheckForwardSubForLastUse(unsigned lclNum)
24362574
// offs - The starting offset into the struct local of the overlapping range to write back to
24372575
// size - The size of the overlapping range
24382576
//
2439-
void ReplaceVisitor::WriteBackBefore(GenTree** use, unsigned lcl, unsigned offs, unsigned size)
2577+
void ReplaceVisitor::WriteBackBeforeUse(GenTree** use, unsigned lcl, unsigned offs, unsigned size)
24402578
{
2441-
if (m_aggregates[lcl] == nullptr)
2442-
{
2443-
return;
2444-
}
2445-
2446-
jitstd::vector<Replacement>& replacements = m_aggregates[lcl]->Replacements;
2447-
size_t index = Promotion::BinarySearch<Replacement, &Replacement::Offset>(replacements, offs);
2448-
2449-
if ((ssize_t)index < 0)
2450-
{
2451-
index = ~index;
2452-
if ((index > 0) && replacements[index - 1].Overlaps(offs, size))
2579+
VisitOverlappingReplacements(lcl, offs, size, [this, &use, lcl](Replacement& rep) {
2580+
if (!rep.NeedsWriteBack)
24532581
{
2454-
index--;
2582+
return;
24552583
}
2456-
}
2457-
2458-
unsigned end = offs + size;
2459-
while ((index < replacements.size()) && (replacements[index].Offset < end))
2460-
{
2461-
Replacement& rep = replacements[index];
2462-
if (rep.NeedsWriteBack)
2463-
{
2464-
GenTreeOp* comma = m_compiler->gtNewOperNode(GT_COMMA, (*use)->TypeGet(),
2465-
Promotion::CreateWriteBack(m_compiler, lcl, rep), *use);
2466-
*use = comma;
2467-
use = &comma->gtOp2;
24682584

2469-
ClearNeedsWriteBack(rep);
2470-
m_madeChanges = true;
2471-
}
2585+
GenTreeOp* comma = m_compiler->gtNewOperNode(GT_COMMA, (*use)->TypeGet(),
2586+
Promotion::CreateWriteBack(m_compiler, lcl, rep), *use);
2587+
*use = comma;
2588+
use = &comma->gtOp2;
24722589

2473-
index++;
2474-
}
2590+
ClearNeedsWriteBack(rep);
2591+
m_madeChanges = true;
2592+
});
24752593
}
24762594

24772595
//------------------------------------------------------------------------
@@ -2621,9 +2739,10 @@ PhaseStatus Promotion::Run()
26212739

26222740
for (Statement* stmt : bb->Statements())
26232741
{
2742+
replacer.StartStatement(stmt);
2743+
26242744
DISPSTMT(stmt);
26252745

2626-
replacer.StartStatement(stmt);
26272746
replacer.WalkTree(stmt->GetRootNodePointer(), nullptr);
26282747

26292748
if (replacer.MadeChanges())

src/coreclr/jit/promotion.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,12 +297,19 @@ class ReplaceVisitor : public GenTreeVisitor<ReplaceVisitor>
297297
void SetNeedsReadBack(Replacement& rep);
298298
void ClearNeedsReadBack(Replacement& rep);
299299

300-
GenTree** InsertMidTreeReadBacksIfNecessary(GenTree** use);
300+
template <typename Func>
301+
void VisitOverlappingReplacements(unsigned lcl, unsigned offs, unsigned size, Func func);
302+
303+
void InsertPreStatementReadBacks();
304+
void InsertPreStatementWriteBacks();
305+
GenTree** InsertMidTreeReadBacks(GenTree** use);
306+
301307
void ReadBackAfterCall(GenTreeCall* call, GenTree* user);
302308
bool IsPromotedStructLocalDying(GenTreeLclVarCommon* structLcl);
303309
void ReplaceLocal(GenTree** use, GenTree* user);
304310
void CheckForwardSubForLastUse(unsigned lclNum);
305-
void WriteBackBefore(GenTree** use, unsigned lcl, unsigned offs, unsigned size);
311+
void WriteBackBeforeCurrentStatement(unsigned lcl, unsigned offs, unsigned size);
312+
void WriteBackBeforeUse(GenTree** use, unsigned lcl, unsigned offs, unsigned size);
306313
void MarkForReadBack(GenTreeLclVarCommon* lcl, unsigned size DEBUGARG(const char* reason));
307314

308315
void HandleStructStore(GenTree** use, GenTree* user);

src/coreclr/jit/promotiondecomposition.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1185,7 +1185,7 @@ void ReplaceVisitor::HandleStructStore(GenTree** use, GenTree* user)
11851185
{
11861186
GenTreeLclVarCommon* srcLcl = store->Data()->AsLclVarCommon();
11871187
unsigned size = srcLcl->GetLayout(m_compiler)->GetSize();
1188-
WriteBackBefore(&store->Data(), srcLcl->GetLclNum(), srcLcl->GetLclOffs(), size);
1188+
WriteBackBeforeUse(&store->Data(), srcLcl->GetLclNum(), srcLcl->GetLclOffs(), size);
11891189
}
11901190

11911191
if (store->OperIsLocalStore())

0 commit comments

Comments
 (0)