Skip to content

Commit 430d6a4

Browse files
authored
JIT: Support containing compares in GT_SELECT for xarch (i.e. start emitting cmov instructions) (#81267)
This adds support for contained compares in GT_SELECT nodes for xarch. As part of this, also enables if-conversion on xarch.
1 parent ca3936b commit 430d6a4

File tree

12 files changed

+642
-144
lines changed

12 files changed

+642
-144
lines changed

src/coreclr/jit/codegen.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1543,9 +1543,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
15431543
instruction genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue);
15441544
#endif // TARGET_XARCH
15451545

1546-
#ifdef TARGET_ARM64
1546+
#if defined(TARGET_ARM64)
15471547
static insCflags InsCflagsForCcmp(GenCondition cond);
15481548
static insCond JumpKindToInsCond(emitJumpKind condition);
1549+
#elif defined(TARGET_XARCH)
1550+
static instruction JumpKindToCmov(emitJumpKind condition);
15491551
#endif
15501552

15511553
#ifndef TARGET_LOONGARCH64

src/coreclr/jit/codegenlinear.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1595,6 +1595,14 @@ void CodeGen::genConsumeRegs(GenTree* tree)
15951595
{
15961596
genConsumeAddress(tree);
15971597
}
1598+
#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
1599+
else if (tree->OperIsCompare())
1600+
{
1601+
// Compares can be contained by SELECT/compare chains.
1602+
genConsumeRegs(tree->gtGetOp1());
1603+
genConsumeRegs(tree->gtGetOp2());
1604+
}
1605+
#endif
15981606
#ifdef TARGET_ARM64
15991607
else if (tree->OperIs(GT_BFIZ))
16001608
{
@@ -1610,10 +1618,9 @@ void CodeGen::genConsumeRegs(GenTree* tree)
16101618
assert(cast->isContained());
16111619
genConsumeAddress(cast->CastOp());
16121620
}
1613-
else if (tree->OperIsCompare() || tree->OperIs(GT_AND))
1621+
else if (tree->OperIs(GT_AND))
16141622
{
1615-
// Compares can be contained by a SELECT.
1616-
// ANDs and Cmp Compares may be contained in a chain.
1623+
// ANDs may be contained in a chain.
16171624
genConsumeRegs(tree->gtGetOp1());
16181625
genConsumeRegs(tree->gtGetOp2());
16191626
}

src/coreclr/jit/codegenxarch.cpp

Lines changed: 118 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1314,6 +1314,46 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree)
13141314
}
13151315
}
13161316

1317+
//------------------------------------------------------------------------
1318+
// JumpKindToCmov:
1319+
// Convert an emitJumpKind to the corresponding cmov instruction.
1320+
//
1321+
// Arguments:
1322+
// condition - the condition
1323+
//
1324+
// Returns:
1325+
// A cmov instruction.
1326+
//
1327+
instruction CodeGen::JumpKindToCmov(emitJumpKind condition)
1328+
{
1329+
static constexpr instruction s_table[EJ_COUNT] = {
1330+
INS_none, INS_none, INS_cmovo, INS_cmovno, INS_cmovb, INS_cmovae, INS_cmove, INS_cmovne, INS_cmovbe,
1331+
INS_cmova, INS_cmovs, INS_cmovns, INS_cmovp, INS_cmovnp, INS_cmovl, INS_cmovge, INS_cmovle, INS_cmovg,
1332+
};
1333+
1334+
static_assert_no_msg(s_table[EJ_NONE] == INS_none);
1335+
static_assert_no_msg(s_table[EJ_jmp] == INS_none);
1336+
static_assert_no_msg(s_table[EJ_jo] == INS_cmovo);
1337+
static_assert_no_msg(s_table[EJ_jno] == INS_cmovno);
1338+
static_assert_no_msg(s_table[EJ_jb] == INS_cmovb);
1339+
static_assert_no_msg(s_table[EJ_jae] == INS_cmovae);
1340+
static_assert_no_msg(s_table[EJ_je] == INS_cmove);
1341+
static_assert_no_msg(s_table[EJ_jne] == INS_cmovne);
1342+
static_assert_no_msg(s_table[EJ_jbe] == INS_cmovbe);
1343+
static_assert_no_msg(s_table[EJ_ja] == INS_cmova);
1344+
static_assert_no_msg(s_table[EJ_js] == INS_cmovs);
1345+
static_assert_no_msg(s_table[EJ_jns] == INS_cmovns);
1346+
static_assert_no_msg(s_table[EJ_jp] == INS_cmovp);
1347+
static_assert_no_msg(s_table[EJ_jnp] == INS_cmovnp);
1348+
static_assert_no_msg(s_table[EJ_jl] == INS_cmovl);
1349+
static_assert_no_msg(s_table[EJ_jge] == INS_cmovge);
1350+
static_assert_no_msg(s_table[EJ_jle] == INS_cmovle);
1351+
static_assert_no_msg(s_table[EJ_jg] == INS_cmovg);
1352+
1353+
assert((condition >= EJ_NONE) && (condition < EJ_COUNT));
1354+
return s_table[condition];
1355+
}
1356+
13171357
//------------------------------------------------------------------------
13181358
// genCodeForCompare: Produce code for a GT_SELECT/GT_SELECT_HI node.
13191359
//
@@ -1328,37 +1368,99 @@ void CodeGen::genCodeForSelect(GenTreeOp* select)
13281368
assert(select->OperIs(GT_SELECT));
13291369
#endif
13301370

1331-
regNumber dstReg = select->GetRegNum();
13321371
if (select->OperIs(GT_SELECT))
13331372
{
1334-
genConsumeReg(select->AsConditional()->gtCond);
1373+
genConsumeRegs(select->AsConditional()->gtCond);
13351374
}
13361375

13371376
genConsumeOperands(select);
13381377

1339-
instruction cmovKind = INS_cmovne;
1340-
GenTree* trueVal = select->gtOp1;
1341-
GenTree* falseVal = select->gtOp2;
1378+
regNumber dstReg = select->GetRegNum();
13421379

1343-
// If the 'true' operand was allocated the same register as the target
1344-
// register then flip it to the false value so we can skip a reg-reg mov.
1345-
if (trueVal->isUsedFromReg() && (trueVal->GetRegNum() == dstReg))
1380+
GenTree* trueVal = select->gtOp1;
1381+
GenTree* falseVal = select->gtOp2;
1382+
1383+
GenCondition cc = GenCondition::NE;
1384+
1385+
if (select->OperIs(GT_SELECT))
1386+
{
1387+
GenTree* cond = select->AsConditional()->gtCond;
1388+
if (cond->isContained())
1389+
{
1390+
assert(cond->OperIsCompare());
1391+
genCodeForCompare(cond->AsOp());
1392+
cc = GenCondition::FromRelop(cond);
1393+
1394+
if (cc.PreferSwap())
1395+
{
1396+
// genCodeForCompare generated the compare with swapped
1397+
// operands because this swap requires fewer branches/cmovs.
1398+
cc = GenCondition::Swap(cc);
1399+
}
1400+
}
1401+
else
1402+
{
1403+
regNumber condReg = cond->GetRegNum();
1404+
GetEmitter()->emitIns_R_R(INS_test, EA_4BYTE, condReg, condReg);
1405+
}
1406+
}
1407+
1408+
// The usual codegen will be
1409+
// mov targetReg, falseValue
1410+
// cmovne targetReg, trueValue
1411+
//
1412+
// However, if the 'true' operand was allocated the same register as the
1413+
// target register then prefer to generate
1414+
//
1415+
// mov targetReg, trueValue
1416+
// cmove targetReg, falseValue
1417+
//
1418+
// so the first mov is elided.
1419+
//
1420+
if (falseVal->isUsedFromReg() && (falseVal->GetRegNum() == dstReg))
13461421
{
13471422
std::swap(trueVal, falseVal);
1348-
cmovKind = INS_cmove;
1423+
cc = GenCondition::Reverse(cc);
13491424
}
13501425

1351-
if (select->OperIs(GT_SELECT))
1426+
// If there is a conflict then swap the condition anyway. LSRA should have
1427+
// ensured the other way around has no conflict.
1428+
if ((trueVal->gtGetContainedRegMask() & genRegMask(dstReg)) != 0)
13521429
{
1353-
// TODO-CQ: Support contained relops here.
1354-
assert(select->AsConditional()->gtCond->isUsedFromReg());
1430+
std::swap(trueVal, falseVal);
1431+
cc = GenCondition::Reverse(cc);
1432+
}
1433+
1434+
GenConditionDesc desc = GenConditionDesc::Get(cc);
13551435

1356-
regNumber condReg = select->AsConditional()->gtCond->GetRegNum();
1357-
GetEmitter()->emitIns_R_R(INS_test, EA_4BYTE, condReg, condReg);
1436+
// There may also be a conflict with the falseVal in case this is an AND
1437+
// condition. Once again, after swapping there should be no conflict as
1438+
// ensured by LSRA.
1439+
if ((desc.oper == GT_AND) && (falseVal->gtGetContainedRegMask() & genRegMask(dstReg)) != 0)
1440+
{
1441+
std::swap(trueVal, falseVal);
1442+
cc = GenCondition::Reverse(cc);
1443+
desc = GenConditionDesc::Get(cc);
13581444
}
13591445

13601446
inst_RV_TT(INS_mov, emitTypeSize(select), dstReg, falseVal);
1361-
inst_RV_TT(cmovKind, emitTypeSize(select), dstReg, trueVal);
1447+
1448+
assert(!trueVal->isContained() || trueVal->isUsedFromMemory());
1449+
assert((trueVal->gtGetContainedRegMask() & genRegMask(dstReg)) == 0);
1450+
inst_RV_TT(JumpKindToCmov(desc.jumpKind1), emitTypeSize(select), dstReg, trueVal);
1451+
1452+
if (desc.oper == GT_AND)
1453+
{
1454+
assert(falseVal->isUsedFromReg());
1455+
assert((falseVal->gtGetContainedRegMask() & genRegMask(dstReg)) == 0);
1456+
inst_RV_TT(JumpKindToCmov(emitter::emitReverseJumpKind(desc.jumpKind2)), emitTypeSize(select), dstReg,
1457+
falseVal);
1458+
}
1459+
else if (desc.oper == GT_OR)
1460+
{
1461+
assert(trueVal->isUsedFromReg());
1462+
inst_RV_TT(JumpKindToCmov(desc.jumpKind2), emitTypeSize(select), dstReg, trueVal);
1463+
}
13621464

13631465
genProduceReg(select);
13641466
}
@@ -1776,6 +1878,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
17761878
case GT_TEST_EQ:
17771879
case GT_TEST_NE:
17781880
case GT_CMP:
1881+
genConsumeOperands(treeNode->AsOp());
17791882
genCodeForCompare(treeNode->AsOp());
17801883
break;
17811884

@@ -6490,8 +6593,6 @@ void CodeGen::genCompareFloat(GenTree* treeNode)
64906593
var_types op1Type = op1->TypeGet();
64916594
var_types op2Type = op2->TypeGet();
64926595

6493-
genConsumeOperands(tree);
6494-
64956596
assert(varTypeIsFloating(op1Type));
64966597
assert(op1Type == op2Type);
64976598

@@ -6565,8 +6666,6 @@ void CodeGen::genCompareInt(GenTree* treeNode)
65656666
emitter* emit = GetEmitter();
65666667
bool canReuseFlags = false;
65676668

6568-
genConsumeOperands(tree);
6569-
65706669
assert(!op1->isContainedIntOrIImmed());
65716670
assert(!varTypeIsFloating(op2Type));
65726671

src/coreclr/jit/gentree.cpp

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1064,7 +1064,7 @@ regMaskTP GenTree::gtGetContainedRegMask()
10641064
{
10651065
if (!isContained())
10661066
{
1067-
return gtGetRegMask();
1067+
return isUsedFromReg() ? gtGetRegMask() : RBM_NONE;
10681068
}
10691069

10701070
regMaskTP mask = 0;
@@ -18757,6 +18757,46 @@ bool GenTree::IsArrayAddr(GenTreeArrAddr** pArrAddr)
1875718757
return false;
1875818758
}
1875918759

18760+
//------------------------------------------------------------------------
18761+
// SupportsSettingZeroFlag: Returns true if this is an arithmetic operation
18762+
// whose codegen supports setting the "zero flag" as part of its operation.
18763+
//
18764+
// Return Value:
18765+
// True if so. A false return does not imply that codegen for the node will
18766+
// not trash the zero flag.
18767+
//
18768+
// Remarks:
18769+
// For example, for EQ (AND x y) 0, both xarch and arm64 can emit
18770+
// instructions that directly set the flags after the 'AND' and thus no
18771+
// comparison is needed.
18772+
//
18773+
// The backend expects any node for which the flags will be consumed to be
18774+
// marked with GTF_SET_FLAGS.
18775+
//
18776+
bool GenTree::SupportsSettingZeroFlag()
18777+
{
18778+
#if defined(TARGET_XARCH)
18779+
if (OperIs(GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_NEG))
18780+
{
18781+
return true;
18782+
}
18783+
18784+
#ifdef FEATURE_HW_INTRINSICS
18785+
if (OperIs(GT_HWINTRINSIC) && emitter::DoesWriteZeroFlag(HWIntrinsicInfo::lookupIns(AsHWIntrinsic())))
18786+
{
18787+
return true;
18788+
}
18789+
#endif
18790+
#elif defined(TARGET_ARM64)
18791+
if (OperIs(GT_AND, GT_ADD, GT_SUB))
18792+
{
18793+
return true;
18794+
}
18795+
#endif
18796+
18797+
return false;
18798+
}
18799+
1876018800
//------------------------------------------------------------------------
1876118801
// Create: Create or retrieve a field sequence for the given field handle.
1876218802
//

src/coreclr/jit/gentree.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1991,6 +1991,8 @@ struct GenTree
19911991

19921992
bool IsArrayAddr(GenTreeArrAddr** pArrAddr);
19931993

1994+
bool SupportsSettingZeroFlag();
1995+
19941996
// These are only used for dumping.
19951997
// The GetRegNum() is only valid in LIR, but the dumping methods are not easily
19961998
// modified to check this.

0 commit comments

Comments
 (0)