Skip to content

Commit 588b964

Browse files
committed
Better version
1 parent 32ebb66 commit 588b964

File tree

6 files changed

+472
-1
lines changed

6 files changed

+472
-1
lines changed

src/coreclr/jit/codegenxarch.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2573,6 +2573,9 @@ void CodeGen::genLclHeap(GenTree* tree)
25732573
target_size_t stackAdjustment = 0;
25742574
target_size_t locAllocStackOffset = 0;
25752575

2576+
// Zeroed via BLK that follows this LCLHEAP
2577+
const bool zeroedViaBlk = tree->gtFlags & GTF_LCLHEAP_ZEROED;
2578+
25762579
// compute the amount of memory to allocate to properly STACK_ALIGN.
25772580
size_t amount = 0;
25782581
if (size->IsCnsIntOrI())
@@ -2588,6 +2591,12 @@ void CodeGen::genLclHeap(GenTree* tree)
25882591
goto BAILOUT;
25892592
}
25902593

2594+
if (zeroedViaBlk)
2595+
{
2596+
// it is expected to be already STACK_ALIGN aligned
2597+
assert((amount % STACK_ALIGN) == 0);
2598+
}
2599+
25912600
// 'amount' is the total number of bytes to localloc to properly STACK_ALIGN
25922601
amount = AlignUp(amount, STACK_ALIGN);
25932602
}
@@ -2694,6 +2703,32 @@ void CodeGen::genLclHeap(GenTree* tree)
26942703
assert((amount % STACK_ALIGN) == 0);
26952704
assert((amount % REGSIZE_BYTES) == 0);
26962705

2706+
// Whether this LCLHEAP is explicitly zeroed via BLK or not
2707+
if (zeroedViaBlk)
2708+
{
2709+
assert(compiler->info.compInitMem); // why would we zero it with !compInitMem
2710+
const bool largePage = amount >= compiler->eeGetPageSize();
2711+
assert(regCnt == REG_NA);
2712+
if (largePage || (TARGET_POINTER_SIZE == 4))
2713+
{
2714+
regCnt = tree->GetSingleTempReg();
2715+
instGen_Set_Reg_To_Imm(EA_8BYTE, regCnt, amount);
2716+
// Negate this value before calling the function to adjust the stack (which adds to ESP).
2717+
inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
2718+
genStackPointerDynamicAdjustmentWithProbe(regCnt);
2719+
// lastTouchDelta is dynamic, and can be up to a page. So if we have outgoing arg space,
2720+
// we're going to assume the worst and probe.
2721+
}
2722+
else
2723+
{
2724+
// Since the size is less than a page, and we don't need to zero init memory, simply adjust ESP.
2725+
// ESP might already be in the guard page, so we must touch it BEFORE the alloc, not after.
2726+
lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount,
2727+
/* trackSpAdjustments */ true);
2728+
}
2729+
goto ALLOC_DONE;
2730+
}
2731+
26972732
// For small allocations we will generate up to six push 0 inline
26982733
size_t cntRegSizedWords = amount / REGSIZE_BYTES;
26992734
if (compiler->info.compInitMem && (cntRegSizedWords <= 6))

src/coreclr/jit/gentree.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,8 @@ enum GenTreeFlags : unsigned int
479479
GTF_FLD_INITCLASS = 0x20000000, // GT_FIELD/GT_FIELD_ADDR -- field access requires preceding class/static init helper
480480
GTF_FLD_TGT_HEAP = 0x10000000, // GT_FIELD -- same as GTF_IND_TGT_HEAP
481481

482+
GTF_LCLHEAP_ZEROED = 0x80000000, // GT_LCLHEAP -- allocation is explicitly zeroed
483+
482484
GTF_INX_RNGCHK = 0x80000000, // GT_INDEX_ADDR -- this array address should be range-checked
483485
GTF_INX_ADDR_NONNULL = 0x40000000, // GT_INDEX_ADDR -- this array address is not null
484486

src/coreclr/jit/importer.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9894,6 +9894,34 @@ void Compiler::impImportBlockCode(BasicBlock* block)
98949894
// May throw a stack overflow exception. Obviously, we don't want locallocs to be CSE'd.
98959895
op1->gtFlags |= (GTF_EXCEPT | GTF_DONT_CSE);
98969896

9897+
#ifdef TARGET_XARCH
9898+
// Emit GT_BLK to zero const-sized LCLHEAP on XARCH
9899+
if (op2->IsIntegralConst() && info.compInitMem)
9900+
{
9901+
const ssize_t size = op2->AsIntCon()->IconValue();
9902+
if ((size > 0) && (size <= UINT_MAX))
9903+
{
9904+
// Align LCLHEAP's size so the zeroing via BLK will be more efficient
9905+
op2->AsIntCon()->gtIconVal = AlignUp((size_t)op2->AsIntCon()->IconValue(), STACK_ALIGN);
9906+
op2->gtFlags |= GTF_DONT_CSE;
9907+
9908+
// Mark as "explicitly zeroed"
9909+
op1->gtFlags |= GTF_LCLHEAP_ZEROED;
9910+
9911+
// Spill LCLHEAP to a local
9912+
unsigned tmpNum = lvaGrabTemp(true DEBUGARG("spilling LCLHEAP"));
9913+
impAssignTempGen(tmpNum, op1);
9914+
op1 = gtNewLclvNode(tmpNum, op1->TypeGet());
9915+
9916+
GenTree* blkTree = new (this, GT_BLK)
9917+
GenTreeBlk(GT_BLK, TYP_STRUCT, op1, typGetBlkLayout((unsigned)size));
9918+
blkTree = gtNewBlkOpNode(blkTree, gtNewIconNode(0));
9919+
impAppendTree(blkTree, CHECK_SPILL_NONE, impCurStmtDI);
9920+
op1 = gtClone(op1);
9921+
}
9922+
}
9923+
#endif
9924+
98979925
// Ensure we have stack security for this method.
98989926
setNeedsGSSecurityCookie();
98999927

src/coreclr/jit/lsraxarch.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1818,7 +1818,8 @@ int LinearScan::BuildLclHeap(GenTree* tree)
18181818
// we will generate 'push 0'.
18191819
assert((sizeVal % REGSIZE_BYTES) == 0);
18201820

1821-
if (!compiler->info.compInitMem)
1821+
// explictly zeroed LCLHEAP also needs a regCnt in case of x86 or large page
1822+
if (!compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_ZEROED))
18221823
{
18231824
#ifdef TARGET_X86
18241825
// x86 always needs regCnt.

0 commit comments

Comments
 (0)