Skip to content

Commit e431c00

Browse files
authored
Arm64: Combine if conditions into compare chains (#79283)
Add a new stage optOptimizeCompareChainCondBlock in pass optOptimizeBools. This aims to reduced the number of conditional jumps by joining cases when multiple conditions gate the execution of a block. Example 1: If ( a > b || c == d) { x = y; } Will be represented in IR as: ------------ BB01 -> BB03 (cond), succs={BB02,BB03} * JTRUE (GT a,b) ------------ BB02 -> BB04 (cond), preds={BB01} succs={BB03,BB04} * JTRUE (NE c,d) ------------ BB03, preds={BB01, BB02} succs={BB04} * ASG (x,y) These operands will be combined into a single AND in the first block (with the first condition inverted), wrapped by the test condition (NE(...,0)). Giving: ------------ BB01 -> BB03 (cond), succs={BB03,BB04} * JTRUE (NE (AND (LE a,b), (NE c,d)), 0) ------------ BB03, preds={BB01} succs={BB04} * ASG x,y Example 2: If ( a > b && c == d) { x = y; } else { x = z; } Here the && conditions are connected via an OR. After the pass: ------------ BB01 -> BB03 (cond), succs={BB03,BB04} * JTRUE (NE (OR (LE a,b), (NE c,d)), 0) ------------ BB03, preds={BB01} succs={BB05} * ASG x,y ------------ BB04, preds={BB01} succs={BB05} * ASG x,z Example 3: If ( a > b || c == d || e < f ) { x = y; } The first pass of the optimization will combine two of the conditions. The second pass will then combine remaining condition the earlier chain. ------------ BB01 -> BB03 (cond), succs={BB03,BB04} * JTRUE (NE (OR ((NE (OR (NE c,d), (GE e,f)), 0), (LE a,b))), 0) ------------ BB03, preds={BB01} succs={BB04} * ASG x,y This optimization means that every condition within the IF statement is always evaluated, as opposed to stopping at the first positive match. Theoretically there is no maximum limit on the size of the generated chain. Therefore cost checking is used to limit the maximum number of conditions that can be chained together. Currently the cost checking limits to a maximum of three simple conditions. This is the same behaviour as GCC. Note that LLVM allows chains of much longer length.
1 parent 6439980 commit e431c00

File tree

6 files changed

+474
-7
lines changed

6 files changed

+474
-7
lines changed

src/coreclr/jit/compiler.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9802,6 +9802,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
98029802
STRESS_MODE(MERGED_RETURNS) \
98039803
STRESS_MODE(BB_PROFILE) \
98049804
STRESS_MODE(OPT_BOOLS_GC) \
9805+
STRESS_MODE(OPT_BOOLS_COMPARE_CHAIN_COST) \
98059806
STRESS_MODE(REMORPH_TREES) \
98069807
STRESS_MODE(64RSLT_MUL) \
98079808
STRESS_MODE(DO_WHILE_LOOPS) \

src/coreclr/jit/optimizer.cpp

Lines changed: 277 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9100,6 +9100,7 @@ class OptBoolsDsc
91009100

91019101
public:
91029102
bool optOptimizeBoolsCondBlock();
9103+
bool optOptimizeCompareChainCondBlock();
91039104
bool optOptimizeBoolsReturnBlock(BasicBlock* b3);
91049105
#ifdef DEBUG
91059106
void optOptimizeBoolsGcStress();
@@ -9110,6 +9111,7 @@ class OptBoolsDsc
91109111
GenTree* optIsBoolComp(OptTestInfo* pOptTest);
91119112
bool optOptimizeBoolsChkTypeCostCond();
91129113
void optOptimizeBoolsUpdateTrees();
9114+
bool FindCompareChain(GenTree* condition, bool* isTestCondition);
91139115
};
91149116

91159117
//-----------------------------------------------------------------------------
@@ -9335,6 +9337,267 @@ bool OptBoolsDsc::optOptimizeBoolsCondBlock()
93359337
return true;
93369338
}
93379339

9340+
//-----------------------------------------------------------------------------
9341+
// FindCompareChain: Check if the given condition is a compare chain.
9342+
//
9343+
// Arguments:
9344+
// condition: Condition to check.
9345+
// isTestCondition: Returns true if condition is but is not a compare chain.
9346+
//
9347+
// Returns:
9348+
// true if chain optimization is a compare chain.
9349+
//
9350+
// Assumptions:
9351+
// m_b1 and m_b2 are set on entry.
9352+
//
9353+
9354+
bool OptBoolsDsc::FindCompareChain(GenTree* condition, bool* isTestCondition)
9355+
{
9356+
GenTree* condOp1 = condition->gtGetOp1();
9357+
GenTree* condOp2 = condition->gtGetOp2();
9358+
9359+
*isTestCondition = false;
9360+
9361+
if (condition->OperIs(GT_EQ, GT_NE) && condOp2->IsIntegralConst())
9362+
{
9363+
ssize_t condOp2Value = condOp2->AsIntCon()->IconValue();
9364+
9365+
if (condOp2Value == 0)
9366+
{
9367+
// Found a EQ/NE(...,0). Does it contain a compare chain (ie - conditions that have
9368+
// previously been combined by optOptimizeCompareChainCondBlock) or is it a test condition
9369+
// that will be optimised to cbz/cbnz during lowering?
9370+
9371+
if (condOp1->OperIs(GT_AND, GT_OR))
9372+
{
9373+
// Check that the second operand of AND/OR ends with a compare operation, as this will be
9374+
// the condition the new link in the chain will connect with.
9375+
if (condOp1->gtGetOp2()->OperIsCmpCompare() && varTypeIsIntegralOrI(condOp1->gtGetOp2()->gtGetOp1()))
9376+
{
9377+
return true;
9378+
}
9379+
}
9380+
9381+
*isTestCondition = true;
9382+
}
9383+
else if (condOp1->OperIs(GT_AND) && isPow2(static_cast<target_size_t>(condOp2Value)) &&
9384+
condOp1->gtGetOp2()->IsIntegralConst(condOp2Value))
9385+
{
9386+
// Found a EQ/NE(AND(...,n),n) which will be optimized to tbz/tbnz during lowering.
9387+
*isTestCondition = true;
9388+
}
9389+
}
9390+
9391+
return false;
9392+
}
9393+
9394+
//-----------------------------------------------------------------------------
9395+
// optOptimizeCompareChainCondBlock: Create a chain when when both m_b1 and m_b2 are BBJ_COND.
9396+
//
9397+
// Returns:
9398+
// true if chain optimization is done and m_b1 and m_b2 are folded into m_b1, else false.
9399+
//
9400+
// Assumptions:
9401+
// m_b1 and m_b2 are set on entry.
9402+
//
9403+
// Notes:
9404+
//
9405+
// This aims to reduced the number of conditional jumps by joining cases when multiple
9406+
// conditions gate the execution of a block.
9407+
//
9408+
// Example 1:
9409+
// If ( a > b || c == d) { x = y; }
9410+
//
9411+
// Will be represented in IR as:
9412+
//
9413+
// ------------ BB01 -> BB03 (cond), succs={BB02,BB03}
9414+
// * JTRUE (GT a,b)
9415+
//
9416+
// ------------ BB02 -> BB04 (cond), preds={BB01} succs={BB03,BB04}
9417+
// * JTRUE (NE c,d)
9418+
//
9419+
// ------------ BB03, preds={BB01, BB02} succs={BB04}
9420+
// * ASG (x,y)
9421+
//
9422+
// These operands will be combined into a single AND in the first block (with the first
9423+
// condition inverted), wrapped by the test condition (NE(...,0)). Giving:
9424+
//
9425+
// ------------ BB01 -> BB03 (cond), succs={BB03,BB04}
9426+
// * JTRUE (NE (AND (LE a,b), (NE c,d)), 0)
9427+
//
9428+
// ------------ BB03, preds={BB01} succs={BB04}
9429+
// * ASG x,y
9430+
//
9431+
//
9432+
// Example 2:
9433+
// If ( a > b && c == d) { x = y; } else { x = z; }
9434+
//
9435+
// Here the && conditions are connected via an OR. After the pass:
9436+
//
9437+
// ------------ BB01 -> BB03 (cond), succs={BB03,BB04}
9438+
// * JTRUE (NE (OR (LE a,b), (NE c,d)), 0)
9439+
//
9440+
// ------------ BB03, preds={BB01} succs={BB05}
9441+
// * ASG x,y
9442+
//
9443+
// ------------ BB04, preds={BB01} succs={BB05}
9444+
// * ASG x,z
9445+
//
9446+
//
9447+
// Example 3:
9448+
// If ( a > b || c == d || e < f ) { x = y; }
9449+
// The first pass of the optimization will combine two of the conditions. The
9450+
// second pass will then combine remaining condition the earlier chain.
9451+
//
9452+
// ------------ BB01 -> BB03 (cond), succs={BB03,BB04}
9453+
// * JTRUE (NE (OR ((NE (OR (NE c,d), (GE e,f)), 0), (LE a,b))), 0)
9454+
//
9455+
// ------------ BB03, preds={BB01} succs={BB04}
9456+
// * ASG x,y
9457+
//
9458+
//
9459+
// This optimization means that every condition within the IF statement is always evaluated,
9460+
// as opposed to stopping at the first positive match.
9461+
// Theoretically there is no maximum limit on the size of the generated chain. Therefore cost
9462+
// checking is used to limit the maximum number of conditions that can be chained together.
9463+
//
9464+
bool OptBoolsDsc::optOptimizeCompareChainCondBlock()
9465+
{
9466+
assert((m_b1 != nullptr) && (m_b2 != nullptr) && (m_b3 == nullptr));
9467+
m_t3 = nullptr;
9468+
9469+
bool foundEndOfOrConditions = false;
9470+
if ((m_b1->bbNext == m_b2) && (m_b1->bbJumpDest == m_b2->bbNext))
9471+
{
9472+
// Found the end of two (or more) conditions being ORed together.
9473+
// The final condition has been inverted.
9474+
foundEndOfOrConditions = true;
9475+
}
9476+
else if ((m_b1->bbNext == m_b2) && (m_b1->bbJumpDest == m_b2->bbJumpDest))
9477+
{
9478+
// Found two conditions connected together.
9479+
}
9480+
else
9481+
{
9482+
return false;
9483+
}
9484+
9485+
Statement* const s1 = optOptimizeBoolsChkBlkCond();
9486+
if (s1 == nullptr)
9487+
{
9488+
return false;
9489+
}
9490+
Statement* s2 = m_b2->firstStmt();
9491+
9492+
assert(m_testInfo1.testTree->OperIs(GT_JTRUE));
9493+
GenTree* cond1 = m_testInfo1.testTree->gtGetOp1();
9494+
assert(m_testInfo2.testTree->OperIs(GT_JTRUE));
9495+
GenTree* cond2 = m_testInfo2.testTree->gtGetOp1();
9496+
9497+
// Ensure both conditions are suitable.
9498+
if (!cond1->OperIsCompare() || !cond2->OperIsCompare())
9499+
{
9500+
return false;
9501+
}
9502+
9503+
// Ensure there are no additional side effects.
9504+
if ((cond1->gtFlags & (GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF)) != 0 ||
9505+
(cond2->gtFlags & (GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF)) != 0)
9506+
{
9507+
return false;
9508+
}
9509+
9510+
// Integer compares only for now (until support for Arm64 fccmp instruction is added)
9511+
if (varTypeIsFloating(cond1->gtGetOp1()) || varTypeIsFloating(cond2->gtGetOp1()))
9512+
{
9513+
return false;
9514+
}
9515+
9516+
// Check for previously optimized compare chains.
9517+
bool op1IsTestCond;
9518+
bool op2IsTestCond;
9519+
bool op1IsCondChain = FindCompareChain(cond1, &op1IsTestCond);
9520+
bool op2IsCondChain = FindCompareChain(cond2, &op2IsTestCond);
9521+
9522+
// Avoid cases where optimizations in lowering will produce better code than optimizing here.
9523+
if (op1IsTestCond || op2IsTestCond)
9524+
{
9525+
return false;
9526+
}
9527+
9528+
// Combining conditions means that all conditions are always fully evaluated.
9529+
// Put a limit on the max size that can be combined.
9530+
if (!m_comp->compStressCompile(Compiler::STRESS_OPT_BOOLS_COMPARE_CHAIN_COST, 25))
9531+
{
9532+
int op1Cost = cond1->GetCostEx();
9533+
int op2Cost = cond2->GetCostEx();
9534+
// The cost of combing three simple conditions is 32.
9535+
int maxOp1Cost = op1IsCondChain ? 31 : 7;
9536+
int maxOp2Cost = op2IsCondChain ? 31 : 7;
9537+
9538+
// Cost to allow for chain size of three.
9539+
if (op1Cost > maxOp1Cost || op2Cost > maxOp2Cost)
9540+
{
9541+
JITDUMP("Skipping CompareChainCond that will evaluate conditions unconditionally at costs %d,%d\n", op1Cost,
9542+
op2Cost);
9543+
return false;
9544+
}
9545+
}
9546+
9547+
// Remove the first JTRUE statement.
9548+
constexpr bool isUnlink = true;
9549+
m_comp->fgRemoveStmt(m_b1, s1 DEBUGARG(isUnlink));
9550+
9551+
// Invert the condition.
9552+
if (foundEndOfOrConditions)
9553+
{
9554+
GenTree* revCond = m_comp->gtReverseCond(cond1);
9555+
assert(cond1 == revCond); // Ensure `gtReverseCond` did not create a new node.
9556+
}
9557+
9558+
// Join the two conditions together
9559+
genTreeOps chainedOper = foundEndOfOrConditions ? GT_AND : GT_OR;
9560+
GenTree* chainedConditions = m_comp->gtNewOperNode(chainedOper, TYP_INT, cond1, cond2);
9561+
cond1->gtFlags &= ~GTF_RELOP_JMP_USED;
9562+
cond2->gtFlags &= ~GTF_RELOP_JMP_USED;
9563+
chainedConditions->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE);
9564+
9565+
// Add a test condition onto the front of the chain
9566+
GenTree* testcondition =
9567+
m_comp->gtNewOperNode(GT_NE, TYP_INT, chainedConditions, m_comp->gtNewZeroConNode(TYP_INT));
9568+
9569+
// Wire the chain into the second block
9570+
m_testInfo2.testTree->AsOp()->gtOp1 = testcondition;
9571+
m_testInfo2.testTree->AsOp()->gtFlags |= (testcondition->gtFlags & GTF_ALL_EFFECT);
9572+
m_comp->gtSetEvalOrder(m_testInfo2.testTree);
9573+
m_comp->fgSetStmtSeq(s2);
9574+
9575+
// Update the flow.
9576+
m_comp->fgRemoveRefPred(m_b1->bbJumpDest, m_b1);
9577+
m_b1->bbJumpKind = BBJ_NONE;
9578+
9579+
// Fixup flags.
9580+
m_b2->bbFlags |= (m_b1->bbFlags & BBF_COPY_PROPAGATE);
9581+
9582+
// Join the two blocks. This is done now to ensure that additional conditions can be chained.
9583+
if (m_comp->fgCanCompactBlocks(m_b1, m_b2))
9584+
{
9585+
m_comp->fgCompactBlocks(m_b1, m_b2);
9586+
}
9587+
9588+
#ifdef DEBUG
9589+
if (m_comp->verbose)
9590+
{
9591+
JITDUMP("\nCombined conditions " FMT_BB " and " FMT_BB " into %s chain :\n", m_b1->bbNum, m_b2->bbNum,
9592+
GenTree::OpName(chainedOper));
9593+
m_comp->fgDumpBlock(m_b1);
9594+
JITDUMP("\n");
9595+
}
9596+
#endif
9597+
9598+
return true;
9599+
}
9600+
93389601
//-----------------------------------------------------------------------------
93399602
// optOptimizeBoolsChkBlkCond: Checks block conditions if it can be boolean optimized
93409603
//
@@ -10076,6 +10339,7 @@ PhaseStatus Compiler::optOptimizeBools()
1007610339
}
1007710340
#endif
1007810341
bool change = false;
10342+
bool retry = false;
1007910343
unsigned numCond = 0;
1008010344
unsigned numReturn = 0;
1008110345
unsigned numPasses = 0;
@@ -10086,8 +10350,10 @@ PhaseStatus Compiler::optOptimizeBools()
1008610350
numPasses++;
1008710351
change = false;
1008810352

10089-
for (BasicBlock* const b1 : Blocks())
10353+
for (BasicBlock* b1 = fgFirstBB; b1 != nullptr; b1 = retry ? b1 : b1->bbNext)
1009010354
{
10355+
retry = false;
10356+
1009110357
// We're only interested in conditional jumps here
1009210358

1009310359
if (b1->bbJumpKind != BBJ_COND)
@@ -10127,6 +10393,16 @@ PhaseStatus Compiler::optOptimizeBools()
1012710393
change = true;
1012810394
numCond++;
1012910395
}
10396+
#ifdef TARGET_ARM64
10397+
else if (optBoolsDsc.optOptimizeCompareChainCondBlock())
10398+
{
10399+
// The optimization will have merged b1 and b2. Retry the loop so that
10400+
// b1 and b2->bbNext can be tested.
10401+
change = true;
10402+
retry = true;
10403+
numCond++;
10404+
}
10405+
#endif
1013010406
}
1013110407
else if (b2->bbJumpKind == BBJ_RETURN)
1013210408
{

0 commit comments

Comments
 (0)