@@ -9100,6 +9100,7 @@ class OptBoolsDsc
9100
9100
9101
9101
public:
9102
9102
bool optOptimizeBoolsCondBlock ();
9103
+ bool optOptimizeCompareChainCondBlock ();
9103
9104
bool optOptimizeBoolsReturnBlock (BasicBlock* b3);
9104
9105
#ifdef DEBUG
9105
9106
void optOptimizeBoolsGcStress ();
@@ -9110,6 +9111,7 @@ class OptBoolsDsc
9110
9111
GenTree* optIsBoolComp (OptTestInfo* pOptTest);
9111
9112
bool optOptimizeBoolsChkTypeCostCond ();
9112
9113
void optOptimizeBoolsUpdateTrees ();
9114
+ bool FindCompareChain (GenTree* condition, bool * isTestCondition);
9113
9115
};
9114
9116
9115
9117
// -----------------------------------------------------------------------------
@@ -9335,6 +9337,267 @@ bool OptBoolsDsc::optOptimizeBoolsCondBlock()
9335
9337
return true ;
9336
9338
}
9337
9339
9340
+ // -----------------------------------------------------------------------------
9341
+ // FindCompareChain: Check if the given condition is a compare chain.
9342
+ //
9343
+ // Arguments:
9344
+ // condition: Condition to check.
9345
+ // isTestCondition: Returns true if condition is but is not a compare chain.
9346
+ //
9347
+ // Returns:
9348
+ // true if chain optimization is a compare chain.
9349
+ //
9350
+ // Assumptions:
9351
+ // m_b1 and m_b2 are set on entry.
9352
+ //
9353
+
9354
+ bool OptBoolsDsc::FindCompareChain (GenTree* condition, bool * isTestCondition)
9355
+ {
9356
+ GenTree* condOp1 = condition->gtGetOp1 ();
9357
+ GenTree* condOp2 = condition->gtGetOp2 ();
9358
+
9359
+ *isTestCondition = false ;
9360
+
9361
+ if (condition->OperIs (GT_EQ, GT_NE) && condOp2->IsIntegralConst ())
9362
+ {
9363
+ ssize_t condOp2Value = condOp2->AsIntCon ()->IconValue ();
9364
+
9365
+ if (condOp2Value == 0 )
9366
+ {
9367
+ // Found a EQ/NE(...,0). Does it contain a compare chain (ie - conditions that have
9368
+ // previously been combined by optOptimizeCompareChainCondBlock) or is it a test condition
9369
+ // that will be optimised to cbz/cbnz during lowering?
9370
+
9371
+ if (condOp1->OperIs (GT_AND, GT_OR))
9372
+ {
9373
+ // Check that the second operand of AND/OR ends with a compare operation, as this will be
9374
+ // the condition the new link in the chain will connect with.
9375
+ if (condOp1->gtGetOp2 ()->OperIsCmpCompare () && varTypeIsIntegralOrI (condOp1->gtGetOp2 ()->gtGetOp1 ()))
9376
+ {
9377
+ return true ;
9378
+ }
9379
+ }
9380
+
9381
+ *isTestCondition = true ;
9382
+ }
9383
+ else if (condOp1->OperIs (GT_AND) && isPow2 (static_cast <target_size_t >(condOp2Value)) &&
9384
+ condOp1->gtGetOp2 ()->IsIntegralConst (condOp2Value))
9385
+ {
9386
+ // Found a EQ/NE(AND(...,n),n) which will be optimized to tbz/tbnz during lowering.
9387
+ *isTestCondition = true ;
9388
+ }
9389
+ }
9390
+
9391
+ return false ;
9392
+ }
9393
+
9394
+ // -----------------------------------------------------------------------------
9395
+ // optOptimizeCompareChainCondBlock: Create a chain when when both m_b1 and m_b2 are BBJ_COND.
9396
+ //
9397
+ // Returns:
9398
+ // true if chain optimization is done and m_b1 and m_b2 are folded into m_b1, else false.
9399
+ //
9400
+ // Assumptions:
9401
+ // m_b1 and m_b2 are set on entry.
9402
+ //
9403
+ // Notes:
9404
+ //
9405
+ // This aims to reduced the number of conditional jumps by joining cases when multiple
9406
+ // conditions gate the execution of a block.
9407
+ //
9408
+ // Example 1:
9409
+ // If ( a > b || c == d) { x = y; }
9410
+ //
9411
+ // Will be represented in IR as:
9412
+ //
9413
+ // ------------ BB01 -> BB03 (cond), succs={BB02,BB03}
9414
+ // * JTRUE (GT a,b)
9415
+ //
9416
+ // ------------ BB02 -> BB04 (cond), preds={BB01} succs={BB03,BB04}
9417
+ // * JTRUE (NE c,d)
9418
+ //
9419
+ // ------------ BB03, preds={BB01, BB02} succs={BB04}
9420
+ // * ASG (x,y)
9421
+ //
9422
+ // These operands will be combined into a single AND in the first block (with the first
9423
+ // condition inverted), wrapped by the test condition (NE(...,0)). Giving:
9424
+ //
9425
+ // ------------ BB01 -> BB03 (cond), succs={BB03,BB04}
9426
+ // * JTRUE (NE (AND (LE a,b), (NE c,d)), 0)
9427
+ //
9428
+ // ------------ BB03, preds={BB01} succs={BB04}
9429
+ // * ASG x,y
9430
+ //
9431
+ //
9432
+ // Example 2:
9433
+ // If ( a > b && c == d) { x = y; } else { x = z; }
9434
+ //
9435
+ // Here the && conditions are connected via an OR. After the pass:
9436
+ //
9437
+ // ------------ BB01 -> BB03 (cond), succs={BB03,BB04}
9438
+ // * JTRUE (NE (OR (LE a,b), (NE c,d)), 0)
9439
+ //
9440
+ // ------------ BB03, preds={BB01} succs={BB05}
9441
+ // * ASG x,y
9442
+ //
9443
+ // ------------ BB04, preds={BB01} succs={BB05}
9444
+ // * ASG x,z
9445
+ //
9446
+ //
9447
+ // Example 3:
9448
+ // If ( a > b || c == d || e < f ) { x = y; }
9449
+ // The first pass of the optimization will combine two of the conditions. The
9450
+ // second pass will then combine remaining condition the earlier chain.
9451
+ //
9452
+ // ------------ BB01 -> BB03 (cond), succs={BB03,BB04}
9453
+ // * JTRUE (NE (OR ((NE (OR (NE c,d), (GE e,f)), 0), (LE a,b))), 0)
9454
+ //
9455
+ // ------------ BB03, preds={BB01} succs={BB04}
9456
+ // * ASG x,y
9457
+ //
9458
+ //
9459
+ // This optimization means that every condition within the IF statement is always evaluated,
9460
+ // as opposed to stopping at the first positive match.
9461
+ // Theoretically there is no maximum limit on the size of the generated chain. Therefore cost
9462
+ // checking is used to limit the maximum number of conditions that can be chained together.
9463
+ //
9464
+ bool OptBoolsDsc::optOptimizeCompareChainCondBlock ()
9465
+ {
9466
+ assert ((m_b1 != nullptr ) && (m_b2 != nullptr ) && (m_b3 == nullptr ));
9467
+ m_t3 = nullptr ;
9468
+
9469
+ bool foundEndOfOrConditions = false ;
9470
+ if ((m_b1->bbNext == m_b2) && (m_b1->bbJumpDest == m_b2->bbNext ))
9471
+ {
9472
+ // Found the end of two (or more) conditions being ORed together.
9473
+ // The final condition has been inverted.
9474
+ foundEndOfOrConditions = true ;
9475
+ }
9476
+ else if ((m_b1->bbNext == m_b2) && (m_b1->bbJumpDest == m_b2->bbJumpDest ))
9477
+ {
9478
+ // Found two conditions connected together.
9479
+ }
9480
+ else
9481
+ {
9482
+ return false ;
9483
+ }
9484
+
9485
+ Statement* const s1 = optOptimizeBoolsChkBlkCond ();
9486
+ if (s1 == nullptr )
9487
+ {
9488
+ return false ;
9489
+ }
9490
+ Statement* s2 = m_b2->firstStmt ();
9491
+
9492
+ assert (m_testInfo1.testTree ->OperIs (GT_JTRUE));
9493
+ GenTree* cond1 = m_testInfo1.testTree ->gtGetOp1 ();
9494
+ assert (m_testInfo2.testTree ->OperIs (GT_JTRUE));
9495
+ GenTree* cond2 = m_testInfo2.testTree ->gtGetOp1 ();
9496
+
9497
+ // Ensure both conditions are suitable.
9498
+ if (!cond1->OperIsCompare () || !cond2->OperIsCompare ())
9499
+ {
9500
+ return false ;
9501
+ }
9502
+
9503
+ // Ensure there are no additional side effects.
9504
+ if ((cond1->gtFlags & (GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF)) != 0 ||
9505
+ (cond2->gtFlags & (GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF)) != 0 )
9506
+ {
9507
+ return false ;
9508
+ }
9509
+
9510
+ // Integer compares only for now (until support for Arm64 fccmp instruction is added)
9511
+ if (varTypeIsFloating (cond1->gtGetOp1 ()) || varTypeIsFloating (cond2->gtGetOp1 ()))
9512
+ {
9513
+ return false ;
9514
+ }
9515
+
9516
+ // Check for previously optimized compare chains.
9517
+ bool op1IsTestCond;
9518
+ bool op2IsTestCond;
9519
+ bool op1IsCondChain = FindCompareChain (cond1, &op1IsTestCond);
9520
+ bool op2IsCondChain = FindCompareChain (cond2, &op2IsTestCond);
9521
+
9522
+ // Avoid cases where optimizations in lowering will produce better code than optimizing here.
9523
+ if (op1IsTestCond || op2IsTestCond)
9524
+ {
9525
+ return false ;
9526
+ }
9527
+
9528
+ // Combining conditions means that all conditions are always fully evaluated.
9529
+ // Put a limit on the max size that can be combined.
9530
+ if (!m_comp->compStressCompile (Compiler::STRESS_OPT_BOOLS_COMPARE_CHAIN_COST, 25 ))
9531
+ {
9532
+ int op1Cost = cond1->GetCostEx ();
9533
+ int op2Cost = cond2->GetCostEx ();
9534
+ // The cost of combing three simple conditions is 32.
9535
+ int maxOp1Cost = op1IsCondChain ? 31 : 7 ;
9536
+ int maxOp2Cost = op2IsCondChain ? 31 : 7 ;
9537
+
9538
+ // Cost to allow for chain size of three.
9539
+ if (op1Cost > maxOp1Cost || op2Cost > maxOp2Cost)
9540
+ {
9541
+ JITDUMP (" Skipping CompareChainCond that will evaluate conditions unconditionally at costs %d,%d\n " , op1Cost,
9542
+ op2Cost);
9543
+ return false ;
9544
+ }
9545
+ }
9546
+
9547
+ // Remove the first JTRUE statement.
9548
+ constexpr bool isUnlink = true ;
9549
+ m_comp->fgRemoveStmt (m_b1, s1 DEBUGARG (isUnlink));
9550
+
9551
+ // Invert the condition.
9552
+ if (foundEndOfOrConditions)
9553
+ {
9554
+ GenTree* revCond = m_comp->gtReverseCond (cond1);
9555
+ assert (cond1 == revCond); // Ensure `gtReverseCond` did not create a new node.
9556
+ }
9557
+
9558
+ // Join the two conditions together
9559
+ genTreeOps chainedOper = foundEndOfOrConditions ? GT_AND : GT_OR;
9560
+ GenTree* chainedConditions = m_comp->gtNewOperNode (chainedOper, TYP_INT, cond1, cond2);
9561
+ cond1->gtFlags &= ~GTF_RELOP_JMP_USED;
9562
+ cond2->gtFlags &= ~GTF_RELOP_JMP_USED;
9563
+ chainedConditions->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE);
9564
+
9565
+ // Add a test condition onto the front of the chain
9566
+ GenTree* testcondition =
9567
+ m_comp->gtNewOperNode (GT_NE, TYP_INT, chainedConditions, m_comp->gtNewZeroConNode (TYP_INT));
9568
+
9569
+ // Wire the chain into the second block
9570
+ m_testInfo2.testTree ->AsOp ()->gtOp1 = testcondition;
9571
+ m_testInfo2.testTree ->AsOp ()->gtFlags |= (testcondition->gtFlags & GTF_ALL_EFFECT);
9572
+ m_comp->gtSetEvalOrder (m_testInfo2.testTree );
9573
+ m_comp->fgSetStmtSeq (s2);
9574
+
9575
+ // Update the flow.
9576
+ m_comp->fgRemoveRefPred (m_b1->bbJumpDest , m_b1);
9577
+ m_b1->bbJumpKind = BBJ_NONE;
9578
+
9579
+ // Fixup flags.
9580
+ m_b2->bbFlags |= (m_b1->bbFlags & BBF_COPY_PROPAGATE);
9581
+
9582
+ // Join the two blocks. This is done now to ensure that additional conditions can be chained.
9583
+ if (m_comp->fgCanCompactBlocks (m_b1, m_b2))
9584
+ {
9585
+ m_comp->fgCompactBlocks (m_b1, m_b2);
9586
+ }
9587
+
9588
+ #ifdef DEBUG
9589
+ if (m_comp->verbose )
9590
+ {
9591
+ JITDUMP (" \n Combined conditions " FMT_BB " and " FMT_BB " into %s chain :\n " , m_b1->bbNum , m_b2->bbNum ,
9592
+ GenTree::OpName (chainedOper));
9593
+ m_comp->fgDumpBlock (m_b1);
9594
+ JITDUMP (" \n " );
9595
+ }
9596
+ #endif
9597
+
9598
+ return true ;
9599
+ }
9600
+
9338
9601
// -----------------------------------------------------------------------------
9339
9602
// optOptimizeBoolsChkBlkCond: Checks block conditions if it can be boolean optimized
9340
9603
//
@@ -10076,6 +10339,7 @@ PhaseStatus Compiler::optOptimizeBools()
10076
10339
}
10077
10340
#endif
10078
10341
bool change = false ;
10342
+ bool retry = false ;
10079
10343
unsigned numCond = 0 ;
10080
10344
unsigned numReturn = 0 ;
10081
10345
unsigned numPasses = 0 ;
@@ -10086,8 +10350,10 @@ PhaseStatus Compiler::optOptimizeBools()
10086
10350
numPasses++;
10087
10351
change = false ;
10088
10352
10089
- for (BasicBlock* const b1 : Blocks () )
10353
+ for (BasicBlock* b1 = fgFirstBB; b1 != nullptr ; b1 = retry ? b1 : b1-> bbNext )
10090
10354
{
10355
+ retry = false ;
10356
+
10091
10357
// We're only interested in conditional jumps here
10092
10358
10093
10359
if (b1->bbJumpKind != BBJ_COND)
@@ -10127,6 +10393,16 @@ PhaseStatus Compiler::optOptimizeBools()
10127
10393
change = true ;
10128
10394
numCond++;
10129
10395
}
10396
+ #ifdef TARGET_ARM64
10397
+ else if (optBoolsDsc.optOptimizeCompareChainCondBlock ())
10398
+ {
10399
+ // The optimization will have merged b1 and b2. Retry the loop so that
10400
+ // b1 and b2->bbNext can be tested.
10401
+ change = true ;
10402
+ retry = true ;
10403
+ numCond++;
10404
+ }
10405
+ #endif
10130
10406
}
10131
10407
else if (b2->bbJumpKind == BBJ_RETURN)
10132
10408
{
0 commit comments