Skip to content

Commit 45c314f

Browse files
huoyaoyuankunalspathakakoeplinger
authored
Implement DivRem intrinsic for X86 (#66551)
* Add managed api for divrem * Add NI definition of DivRem * Fix DivRem to be static * Implement DivRem in clrjit * Add tests for DivRem * Adjust lsra and RMW * Use DivRem intrinsic in Math * Bring lower change from coreclr#37928 This fixes error while crossgen2 compiling Utf8Formatter.TryFormat(TimeSpan). * Fix signedness of DIV * Revert RMW change and fix reg allocation * Fix import of X64 intrinsic * Fix static in PNSE version * Fix accidential indent change * Apply format patch * op3 candidate should be different from op1 and op2 * Add guard over AsHWIntrinsic * Disable DivRem intrinsic use for Mono * Fix LSRA and invalid assertion * Set RWM information correctly, although totally unused. * Move multi-reg temp allocation to common helper * Disable DivRem intrinsic test for Mono. * Fix typo of quotient * Use impAssignMultiRegTypeToVar * Update #ifdef * Remove change in LowerBlockStore * Adjust assert and helper method * Update CheckMultiRegLclVar * fix build errors * Fix some merge conflicts * Fix some errors * Fix GCC build error * jit format * Make the size depending on the struct * Fix the formatting of summary * Remove trailing spaces * Fix the tests to adopt new model * Fix MAX_RET_REG_COUNT -> MAX_MULTIREG_COUNT * Exclude from mono run * Add RequiresProcessIsolation * Real fix for build break * Fix the test cases to adopt to the new system * Disable for llvm-fullaot * Also continue disabling for mono * Disable the test in a different group * fix merge conflict * format * fix another merge conflict error * misc changes from review * fix the replay errors * jit format * Add exclude list in mono/llvmfullaot * Review feedback and fix bug in CheckMultiRegLclVar * Pass registerCount * review feedback * Remove the extra comments * Disable tests on mono llvmfullaot runs as well * Add missing case for upper save/restore * Add missing RequiresProcessIsolation * Revert "Add missing case for upper save/restore" This reverts commit a98dbde. * Remove RequiresProcessIsolation property * Replace ref with fakelibs * Add references of fakelib to test project * Fix CompatibilitySuppressions.xml * fix test builds * Remove unneeded extern/using * Revert "Replace ref with fakelibs" This reverts commit e1b5fb3. * Revert "Revert "Replace ref with fakelibs"" This reverts commit 39ae98d. * Exclude DivRem.csproj * Handle the case to delay-free op3 for op1/op2 * Create the DivRem.RefOnly fake CoreLib as a reference assembly Make the DivRem tests reference the DivRem fake CoreLib as reference assembly; and ensure that it is not included as a ProjectReference by the toplevel HardwareIntrinsics merged test runners. The upshot is that the DivRem tests can call the extra APIs via a direct reference to CoreLib (instead of through System.Runtime), but the fake library is not copied into any test artifact directories, and the Mono AOT compiler never sees it. * Unify AddDelayFreeUses --------- Co-authored-by: Kunal Pathak <[email protected]> Co-authored-by: Alexander Köplinger <[email protected]>
1 parent 0717eda commit 45c314f

33 files changed

+897
-100
lines changed
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,24 @@
11
<?xml version="1.0" encoding="utf-8"?>
2+
<!-- https://learn.microsoft.com/en-us/dotnet/fundamentals/package-validation/diagnostic-ids -->
23
<Suppressions xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
4+
<Suppression>
5+
<DiagnosticId>CP0001</DiagnosticId>
6+
<Target>T:Internal.Console</Target>
7+
</Suppression>
8+
<Suppression>
9+
<DiagnosticId>CP0001</DiagnosticId>
10+
<Target>T:System.Runtime.CompilerServices.ICastable</Target>
11+
</Suppression>
12+
<Suppression>
13+
<DiagnosticId>CP0002</DiagnosticId>
14+
<Target>F:System.Resources.ResourceManager.BaseNameField</Target>
15+
</Suppression>
16+
<Suppression>
17+
<DiagnosticId>CP0002</DiagnosticId>
18+
<Target>F:System.Resources.ResourceSet.Reader</Target>
19+
</Suppression>
20+
<Suppression>
21+
<DiagnosticId>CP0014</DiagnosticId>
22+
<Target>M:System.Runtime.InteropServices.Marshal.CreateWrapperOfType(System.Object,System.Type)-&gt;object?:[T:System.Diagnostics.CodeAnalysis.NotNullIfNotNullAttribute]</Target>
23+
</Suppression>
324
</Suppressions>

src/coreclr/jit/compiler.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3925,6 +3925,7 @@ class Compiler
39253925
GenTree* addRangeCheckIfNeeded(
39263926
NamedIntrinsic intrinsic, GenTree* immOp, bool mustExpand, int immLowerBound, int immUpperBound);
39273927
GenTree* addRangeCheckForHWIntrinsic(GenTree* immOp, int immLowerBound, int immUpperBound);
3928+
39283929
#endif // FEATURE_HW_INTRINSICS
39293930
GenTree* impArrayAccessIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
39303931
CORINFO_SIG_INFO* sig,

src/coreclr/jit/gentree.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19130,6 +19130,8 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp)
1913019130
case NI_FMA_MultiplySubtractNegated:
1913119131
case NI_FMA_MultiplySubtractNegatedScalar:
1913219132
case NI_FMA_MultiplySubtractScalar:
19133+
case NI_X86Base_DivRem:
19134+
case NI_X86Base_X64_DivRem:
1913319135
{
1913419136
return true;
1913519137
}
@@ -24004,6 +24006,12 @@ ClassLayout* GenTreeHWIntrinsic::GetLayout(Compiler* compiler) const
2400424006

2400524007
switch (GetHWIntrinsicId())
2400624008
{
24009+
#ifdef TARGET_XARCH
24010+
case NI_X86Base_DivRem:
24011+
return compiler->typGetBlkLayout(genTypeSize(GetSimdBaseType()) * 2);
24012+
case NI_X86Base_X64_DivRem:
24013+
return compiler->typGetBlkLayout(16);
24014+
#endif // TARGET_XARCH
2400724015
#ifdef TARGET_ARM64
2400824016
case NI_AdvSimd_Arm64_LoadPairScalarVector64:
2400924017
case NI_AdvSimd_Arm64_LoadPairScalarVector64NonTemporal:

src/coreclr/jit/gentree.h

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3674,8 +3674,8 @@ static const unsigned PACKED_GTF_SPILLED = 2;
36743674
//
36753675
inline GenTreeFlags GetMultiRegSpillFlagsByIdx(MultiRegSpillFlags flags, unsigned idx)
36763676
{
3677-
static_assert_no_msg(MAX_RET_REG_COUNT * 2 <= sizeof(unsigned char) * BITS_PER_BYTE);
3678-
assert(idx < MAX_RET_REG_COUNT);
3677+
static_assert_no_msg(MAX_MULTIREG_COUNT * 2 <= sizeof(unsigned char) * BITS_PER_BYTE);
3678+
assert(idx < MAX_MULTIREG_COUNT);
36793679

36803680
unsigned bits = flags >> (idx * 2); // It doesn't matter that we possibly leave other high bits here.
36813681
GenTreeFlags spillFlags = GTF_EMPTY;
@@ -3706,8 +3706,8 @@ inline GenTreeFlags GetMultiRegSpillFlagsByIdx(MultiRegSpillFlags flags, unsigne
37063706
//
37073707
inline MultiRegSpillFlags SetMultiRegSpillFlagsByIdx(MultiRegSpillFlags oldFlags, GenTreeFlags flagsToSet, unsigned idx)
37083708
{
3709-
static_assert_no_msg(MAX_RET_REG_COUNT * 2 <= sizeof(unsigned char) * BITS_PER_BYTE);
3710-
assert(idx < MAX_RET_REG_COUNT);
3709+
static_assert_no_msg(MAX_MULTIREG_COUNT * 2 <= sizeof(unsigned char) * BITS_PER_BYTE);
3710+
assert(idx < MAX_MULTIREG_COUNT);
37113711

37123712
MultiRegSpillFlags newFlags = oldFlags;
37133713
unsigned bits = 0;
@@ -8048,7 +8048,7 @@ struct GenTreeCopyOrReload : public GenTreeUnOp
80488048
// State required to support copy/reload of a multi-reg call node.
80498049
// The first register is always given by GetRegNum().
80508050
//
8051-
regNumberSmall gtOtherRegs[MAX_RET_REG_COUNT - 1];
8051+
regNumberSmall gtOtherRegs[MAX_MULTIREG_COUNT - 1];
80528052
#endif
80538053

80548054
//----------------------------------------------------------
@@ -8063,7 +8063,7 @@ struct GenTreeCopyOrReload : public GenTreeUnOp
80638063
void ClearOtherRegs()
80648064
{
80658065
#if FEATURE_MULTIREG_RET
8066-
for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i)
8066+
for (unsigned i = 0; i < MAX_MULTIREG_COUNT - 1; ++i)
80678067
{
80688068
gtOtherRegs[i] = REG_NA;
80698069
}
@@ -8081,7 +8081,7 @@ struct GenTreeCopyOrReload : public GenTreeUnOp
80818081
//
80828082
regNumber GetRegNumByIdx(unsigned idx) const
80838083
{
8084-
assert(idx < MAX_RET_REG_COUNT);
8084+
assert(idx < MAX_MULTIREG_COUNT);
80858085

80868086
if (idx == 0)
80878087
{
@@ -8107,7 +8107,7 @@ struct GenTreeCopyOrReload : public GenTreeUnOp
81078107
//
81088108
void SetRegNumByIdx(regNumber reg, unsigned idx)
81098109
{
8110-
assert(idx < MAX_RET_REG_COUNT);
8110+
assert(idx < MAX_MULTIREG_COUNT);
81118111

81128112
if (idx == 0)
81138113
{
@@ -8144,7 +8144,7 @@ struct GenTreeCopyOrReload : public GenTreeUnOp
81448144
assert(OperGet() == from->OperGet());
81458145

81468146
#ifdef UNIX_AMD64_ABI
8147-
for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i)
8147+
for (unsigned i = 0; i < MAX_MULTIREG_COUNT - 1; ++i)
81488148
{
81498149
gtOtherRegs[i] = from->gtOtherRegs[i];
81508150
}
@@ -8161,7 +8161,7 @@ struct GenTreeCopyOrReload : public GenTreeUnOp
81618161
// but for COPY or RELOAD there is only a valid register for the register positions
81628162
// that must be copied or reloaded.
81638163
//
8164-
for (unsigned i = MAX_RET_REG_COUNT; i > 1; i--)
8164+
for (unsigned i = MAX_MULTIREG_COUNT; i > 1; i--)
81658165
{
81668166
if (gtOtherRegs[i - 2] != REG_NA)
81678167
{
@@ -9157,6 +9157,7 @@ inline var_types GenTree::GetRegTypeByIndex(int regIndex) const
91579157
#endif // !defined(TARGET_64BIT)
91589158
#endif // FEATURE_MULTIREG_RET
91599159

9160+
#ifdef FEATURE_HW_INTRINSICS
91609161
if (OperIsHWIntrinsic())
91619162
{
91629163
assert(TypeGet() == TYP_STRUCT);
@@ -9173,9 +9174,10 @@ inline var_types GenTree::GetRegTypeByIndex(int regIndex) const
91739174
#elif defined(TARGET_XARCH)
91749175
// At this time, the only multi-reg HW intrinsics all return the type of their
91759176
// arguments. If this changes, we will need a way to record or determine this.
9176-
return gtGetOp1()->TypeGet();
9177+
return AsHWIntrinsic()->Op(1)->TypeGet();
91779178
#endif
91789179
}
9180+
#endif // FEATURE_HW_INTRINSICS
91799181

91809182
if (OperIsScalarLocal())
91819183
{

src/coreclr/jit/hwintrinsic.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -808,6 +808,12 @@ struct HWIntrinsicInfo
808808
return 2;
809809
#endif
810810

811+
#ifdef TARGET_XARCH
812+
case NI_X86Base_DivRem:
813+
case NI_X86Base_X64_DivRem:
814+
return 2;
815+
#endif // TARGET_XARCH
816+
811817
default:
812818
unreached();
813819
}

src/coreclr/jit/hwintrinsiccodegenxarch.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1187,6 +1187,43 @@ void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node)
11871187
break;
11881188
}
11891189

1190+
case NI_X86Base_DivRem:
1191+
case NI_X86Base_X64_DivRem:
1192+
{
1193+
assert(node->GetOperandCount() == 3);
1194+
1195+
// SIMD base type is from signature and can distinguish signed and unsigned
1196+
var_types targetType = node->GetSimdBaseType();
1197+
GenTree* op1 = node->Op(1);
1198+
GenTree* op2 = node->Op(2);
1199+
GenTree* op3 = node->Op(3);
1200+
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, targetType);
1201+
1202+
regNumber op1Reg = op1->GetRegNum();
1203+
regNumber op2Reg = op2->GetRegNum();
1204+
regNumber op3Reg = op3->GetRegNum();
1205+
1206+
emitAttr attr = emitTypeSize(targetType);
1207+
emitter* emit = GetEmitter();
1208+
1209+
// op1: EAX, op2: EDX, op3: free
1210+
assert(op1Reg != REG_EDX);
1211+
assert(op2Reg != REG_EAX);
1212+
if (op3->isUsedFromReg())
1213+
{
1214+
assert(op3Reg != REG_EDX);
1215+
assert(op3Reg != REG_EAX);
1216+
}
1217+
1218+
emit->emitIns_Mov(INS_mov, attr, REG_EAX, op1Reg, /* canSkip */ true);
1219+
emit->emitIns_Mov(INS_mov, attr, REG_EDX, op2Reg, /* canSkip */ true);
1220+
1221+
// emit the DIV/IDIV instruction
1222+
emit->emitInsBinary(ins, attr, node, op3);
1223+
1224+
break;
1225+
}
1226+
11901227
default:
11911228
unreached();
11921229
break;

src/coreclr/jit/hwintrinsiclistxarch.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ HARDWARE_INTRINSIC(Vector256, Xor,
238238
HARDWARE_INTRINSIC(X86Base, BitScanForward, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsf, INS_bsf, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
239239
HARDWARE_INTRINSIC(X86Base, BitScanReverse, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsr, INS_bsr, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
240240
HARDWARE_INTRINSIC(X86Base, Pause, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
241+
HARDWARE_INTRINSIC(X86Base, DivRem, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_idiv, INS_div, INS_idiv, INS_div, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg|HW_Flag_MultiReg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
241242

242243
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
243244
// ISA Function name SIMD size NumArg Instructions Category Flags
@@ -246,6 +247,7 @@ HARDWARE_INTRINSIC(X86Base, Pause,
246247
// X86Base 64-bit-only Intrinsics
247248
HARDWARE_INTRINSIC(X86Base_X64, BitScanForward, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsf, INS_bsf, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
248249
HARDWARE_INTRINSIC(X86Base_X64, BitScanReverse, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsr, INS_bsr, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
250+
HARDWARE_INTRINSIC(X86Base_X64, DivRem, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_idiv, INS_div, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg|HW_Flag_MultiReg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
249251

250252
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
251253
// ISA Function name SIMD size NumArg Instructions Category Flags

src/coreclr/jit/hwintrinsicxarch.cpp

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -513,7 +513,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
513513
}
514514

515515
var_types simdBaseType = TYP_UNKNOWN;
516-
517516
if (simdSize != 0)
518517
{
519518
simdBaseType = JitType2PreciseVarType(simdBaseJitType);
@@ -2366,6 +2365,28 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
23662365
break;
23672366
}
23682367

2368+
case NI_X86Base_DivRem:
2369+
case NI_X86Base_X64_DivRem:
2370+
{
2371+
assert(sig->numArgs == 3);
2372+
assert(HWIntrinsicInfo::IsMultiReg(intrinsic));
2373+
assert(retType == TYP_STRUCT);
2374+
assert(simdBaseJitType != CORINFO_TYPE_UNDEF);
2375+
2376+
op3 = impPopStack().val;
2377+
op2 = impPopStack().val;
2378+
op1 = impPopStack().val;
2379+
2380+
GenTreeHWIntrinsic* divRemIntrinsic = gtNewScalarHWIntrinsicNode(retType, op1, op2, op3, intrinsic);
2381+
2382+
// Store the type from signature into SIMD base type for convenience
2383+
divRemIntrinsic->SetSimdBaseJitType(simdBaseJitType);
2384+
2385+
retNode = impAssignMultiRegTypeToVar(divRemIntrinsic,
2386+
sig->retTypeSigClass DEBUGARG(CorInfoCallConvExtension::Managed));
2387+
break;
2388+
}
2389+
23692390
case NI_SSE_CompareScalarGreaterThan:
23702391
case NI_SSE_CompareScalarGreaterThanOrEqual:
23712392
case NI_SSE_CompareScalarNotGreaterThan:

src/coreclr/jit/importer.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11009,16 +11009,19 @@ GenTree* Compiler::impAssignMultiRegTypeToVar(GenTree* op,
1100911009
{
1101011010
unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for multireg return"));
1101111011
impAssignTempGen(tmpNum, op, hClass, CHECK_SPILL_ALL);
11012-
GenTree* ret = gtNewLclvNode(tmpNum, lvaTable[tmpNum].lvType);
11012+
11013+
LclVarDsc* varDsc = lvaGetDesc(tmpNum);
11014+
11015+
// Set "lvIsMultiRegRet" to block promotion under "!lvaEnregMultiRegVars".
11016+
varDsc->lvIsMultiRegRet = true;
11017+
11018+
GenTreeLclVar* ret = gtNewLclvNode(tmpNum, varDsc->lvType);
1101311019

1101411020
// TODO-1stClassStructs: Handle constant propagation and CSE-ing of multireg returns.
11015-
ret->gtFlags |= GTF_DONT_CSE;
11021+
ret->SetDoNotCSE();
1101611022

1101711023
assert(IsMultiRegReturnedType(hClass, callConv) || op->IsMultiRegNode());
1101811024

11019-
// Set "lvIsMultiRegRet" to block promotion under "!lvaEnregMultiRegVars".
11020-
lvaTable[tmpNum].lvIsMultiRegRet = true;
11021-
1102211025
return ret;
1102311026
}
1102411027

0 commit comments

Comments
 (0)