Skip to content

Commit 479ccb3

Browse files
Adding support for Vector512 bitwise operations: And, AndNot, Or, OnesComplement, and Xor (#83354)
* Adding support for Vector512 bitwise operations: And, AndNot, Or, OnesComplement, and Xor * Adding AVX512F APIs for And, AndNot, Load, Or, Store, and Xor * Fix the "throw new PlatformNotSupported" expressions for Avx512F * Fixing some test build failures * Ensure the Avx512F and related classes can lightup in import * Ensure that JitStressEvexEncoding is only checked in debug * Allow 64-bit alignment in the test data table types and fix the AVX512 enablement check
1 parent 751bdcf commit 479ccb3

33 files changed

+2663
-68
lines changed

src/coreclr/jit/compiler.cpp

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2287,7 +2287,28 @@ void Compiler::compSetProcessor()
22872287
instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW) &&
22882288
instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ))
22892289
{
2290-
if (!DoJitStressEvexEncoding())
2290+
// Using JitStressEVEXEncoding flag will force instructions which would
2291+
// otherwise use VEX encoding but can be EVEX encoded to use EVEX encoding
2292+
// This requires AVX512VL support. JitForceEVEXEncoding forces this encoding, thus
2293+
// causing failure if not running on compatible hardware.
2294+
2295+
// We can't use !DoJitStressEvexEncoding() yet because opts.compSupportsISA hasn't
2296+
// been set yet as that's what we're trying to set here
2297+
2298+
bool enableAvx512 = false;
2299+
2300+
#if defined(DEBUG)
2301+
if (JitConfig.JitForceEVEXEncoding())
2302+
{
2303+
enableAvx512 = true;
2304+
}
2305+
else if (JitConfig.JitStressEvexEncoding() && instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL))
2306+
{
2307+
enableAvx512 = true;
2308+
}
2309+
#endif // DEBUG
2310+
2311+
if (!enableAvx512)
22912312
{
22922313
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F);
22932314
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_VL);

src/coreclr/jit/emitxarch.cpp

Lines changed: 96 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -287,12 +287,12 @@ bool emitter::IsEvexEncodedInstruction(instruction ins) const
287287
// Since we are not using k registers yet, this will have no impact on correctness but will affect things
288288
// once
289289
// k registers are used (as that is the point of the "break out operand type" of these instructions)
290-
// case INS_movdqa: // INS_movdqa32, INS_movdqa64.
291-
// case INS_movdqu: // INS_movdqu8, INS_movdqu16, INS_movdqu32, INS_movdqu64.
292-
// case INS_pand: // INS_pandd, INS_pandq.
293-
// case INS_pandn: // INS_pandnd, INS_pandnq.
294-
// case INS_por: // INS_pord, INS_porq.
295-
// case INS_pxor: // INS_pxord, INS_pxorq
290+
// case INS_movdqa: // INS_vmovdqa32, INS_vmovdqa64.
291+
// case INS_movdqu: // INS_movdqu8, INS_movdqu16, INS_vmovdqu32, INS_vmovdqu64.
292+
// case INS_pand: // INS_vpandd, INS_vpandq.
293+
// case INS_pandn: // INS_vpandnd, INS_vpandnq.
294+
// case INS_por: // INS_vpord, INS_vporq.
295+
// case INS_pxor: // INS_vpxord, INS_vpxorq
296296
// case INS_vextractf128: // INS_vextractf32x4, INS_vextractf64x2.
297297
// case INS_vextracti128: // INS_vextracti32x4, INS_vextracti64x2.
298298
// case INS_vinsertf128: // INS_vinsertf32x4, INS_vinsertf64x2.
@@ -492,6 +492,72 @@ bool emitter::IsFlagsAlwaysModified(instrDesc* id)
492492
return true;
493493
}
494494

495+
//------------------------------------------------------------------------
496+
// IsRexW0Instruction: check if the instruction always encodes REX.W as 0
497+
//
498+
// Arguments:
499+
// id - instruction to test
500+
//
501+
// Return Value:
502+
// true if the instruction always encodes REX.W as 0; othwerwise, false
503+
//
504+
bool emitter::IsRexW0Instruction(instruction ins)
505+
{
506+
insFlags flags = CodeGenInterface::instInfo[ins];
507+
508+
if ((flags & REX_W0) != 0)
509+
{
510+
assert((flags & (REX_W1 | REX_WX)) == 0);
511+
return true;
512+
}
513+
514+
return false;
515+
}
516+
517+
//------------------------------------------------------------------------
518+
// IsRexW1Instruction: check if the instruction always encodes REX.W as 1
519+
//
520+
// Arguments:
521+
// id - instruction to test
522+
//
523+
// Return Value:
524+
// true if the instruction always encodes REX.W as 1; othwerwise, false
525+
//
526+
bool emitter::IsRexW1Instruction(instruction ins)
527+
{
528+
insFlags flags = CodeGenInterface::instInfo[ins];
529+
530+
if ((flags & REX_W1) != 0)
531+
{
532+
assert((flags & (REX_W0 | REX_WX)) == 0);
533+
return true;
534+
}
535+
536+
return false;
537+
}
538+
539+
//------------------------------------------------------------------------
540+
// IsRexWXInstruction: check if the instruction requires special REX.W encoding
541+
//
542+
// Arguments:
543+
// id - instruction to test
544+
//
545+
// Return Value:
546+
// true if the instruction requires special REX.W encoding; othwerwise, false
547+
//
548+
bool emitter::IsRexWXInstruction(instruction ins)
549+
{
550+
insFlags flags = CodeGenInterface::instInfo[ins];
551+
552+
if ((flags & REX_WX) != 0)
553+
{
554+
assert((flags & (REX_W0 | REX_W1)) == 0);
555+
return true;
556+
}
557+
558+
return false;
559+
}
560+
495561
#ifdef TARGET_64BIT
496562
//------------------------------------------------------------------------
497563
// AreUpper32BitsZero: check if some previously emitted
@@ -5868,13 +5934,13 @@ bool emitter::IsMovInstruction(instruction ins)
58685934
case INS_movaps:
58695935
case INS_movd:
58705936
case INS_movdqa:
5871-
case INS_movdqa32:
5872-
case INS_movdqa64:
5937+
case INS_vmovdqa32:
5938+
case INS_vmovdqa64:
58735939
case INS_movdqu:
58745940
case INS_movdqu8:
58755941
case INS_movdqu16:
5876-
case INS_movdqu32:
5877-
case INS_movdqu64:
5942+
case INS_vmovdqu32:
5943+
case INS_vmovdqu64:
58785944
case INS_movsdsse2:
58795945
case INS_movss:
58805946
case INS_movsx:
@@ -6017,12 +6083,12 @@ bool emitter::HasSideEffect(instruction ins, emitAttr size)
60176083
break;
60186084
}
60196085

6020-
case INS_movdqa32:
6021-
case INS_movdqa64:
6086+
case INS_vmovdqa32:
6087+
case INS_vmovdqa64:
60226088
case INS_movdqu8:
60236089
case INS_movdqu16:
6024-
case INS_movdqu32:
6025-
case INS_movdqu64:
6090+
case INS_vmovdqu32:
6091+
case INS_vmovdqu64:
60266092
{
60276093
// These EVEX instructions merges/masks based on k-register
60286094
// TODO-XArch-AVX512 : Handle merge/masks scenarios once k-mask support is added for these.
@@ -6233,13 +6299,13 @@ void emitter::emitIns_Mov(instruction ins, emitAttr attr, regNumber dstReg, regN
62336299
case INS_movapd:
62346300
case INS_movaps:
62356301
case INS_movdqa:
6236-
case INS_movdqa32:
6237-
case INS_movdqa64:
6302+
case INS_vmovdqa32:
6303+
case INS_vmovdqa64:
62386304
case INS_movdqu:
62396305
case INS_movdqu8:
62406306
case INS_movdqu16:
6241-
case INS_movdqu32:
6242-
case INS_movdqu64:
6307+
case INS_vmovdqu32:
6308+
case INS_vmovdqu64:
62436309
case INS_movsdsse2:
62446310
case INS_movss:
62456311
case INS_movupd:
@@ -17472,13 +17538,13 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
1747217538
break;
1747317539

1747417540
case INS_movdqa:
17475-
case INS_movdqa32:
17476-
case INS_movdqa64:
17541+
case INS_vmovdqa32:
17542+
case INS_vmovdqa64:
1747717543
case INS_movdqu:
1747817544
case INS_movdqu8:
1747917545
case INS_movdqu16:
17480-
case INS_movdqu32:
17481-
case INS_movdqu64:
17546+
case INS_vmovdqu32:
17547+
case INS_vmovdqu64:
1748217548
case INS_movaps:
1748317549
case INS_movups:
1748417550
case INS_movapd:
@@ -17691,9 +17757,17 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
1769117757
case INS_paddusw:
1769217758
case INS_psubusw:
1769317759
case INS_pand:
17760+
case INS_vpandd:
17761+
case INS_vpandq:
1769417762
case INS_pandn:
17763+
case INS_vpandnd:
17764+
case INS_vpandnq:
1769517765
case INS_por:
17766+
case INS_vpord:
17767+
case INS_vporq:
1769617768
case INS_pxor:
17769+
case INS_vpxord:
17770+
case INS_vpxorq:
1769717771
case INS_andpd:
1769817772
case INS_andps:
1769917773
case INS_andnpd:

src/coreclr/jit/emitxarch.h

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,21 @@ bool IsWEvexOpcodeExtension(const instrDesc* id)
202202

203203
instruction ins = id->idIns();
204204

205+
if (IsRexW0Instruction(ins))
206+
{
207+
return false;
208+
}
209+
else if (IsRexW1Instruction(ins))
210+
{
211+
return true;
212+
}
213+
214+
if (IsRexWXInstruction(ins))
215+
{
216+
// TODO: Make this a simple assert once all instructions are annotated
217+
unreached();
218+
}
219+
205220
switch (ins)
206221
{
207222
case INS_movq:
@@ -291,9 +306,7 @@ bool IsWEvexOpcodeExtension(const instrDesc* id)
291306
case INS_vfnmsub231sd:
292307
case INS_unpcklpd:
293308
case INS_vpermilpdvar:
294-
case INS_movdqa64:
295309
case INS_movdqu16:
296-
case INS_movdqu64:
297310
case INS_vinsertf64x4:
298311
case INS_vinserti64x4:
299312
{
@@ -409,9 +422,7 @@ bool IsWEvexOpcodeExtension(const instrDesc* id)
409422
case INS_vpdpbusds:
410423
case INS_vpdpwssds:
411424
case INS_vpermilpsvar:
412-
case INS_movdqa32:
413425
case INS_movdqu8:
414-
case INS_movdqu32:
415426
case INS_vinsertf32x8:
416427
case INS_vinserti32x8:
417428
{
@@ -648,6 +659,9 @@ static bool DoesWriteZeroFlag(instruction ins);
648659
bool DoesWriteSignFlag(instruction ins);
649660
bool DoesResetOverflowAndCarryFlags(instruction ins);
650661
bool IsFlagsAlwaysModified(instrDesc* id);
662+
static bool IsRexW0Instruction(instruction ins);
663+
static bool IsRexW1Instruction(instruction ins);
664+
static bool IsRexWXInstruction(instruction ins);
651665

652666
bool IsThreeOperandAVXInstruction(instruction ins)
653667
{

src/coreclr/jit/gentree.cpp

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19593,7 +19593,12 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op,
1959319593

1959419594
case GT_AND:
1959519595
{
19596-
if (simdSize == 32)
19596+
if (simdSize == 64)
19597+
{
19598+
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
19599+
intrinsic = NI_AVX512F_And;
19600+
}
19601+
else if (simdSize == 32)
1959719602
{
1959819603
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
1959919604

@@ -19627,7 +19632,12 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op,
1962719632

1962819633
case GT_AND_NOT:
1962919634
{
19630-
if (simdSize == 32)
19635+
if (simdSize == 64)
19636+
{
19637+
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
19638+
intrinsic = NI_AVX512F_AndNot;
19639+
}
19640+
else if (simdSize == 32)
1963119641
{
1963219642
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
1963319643

@@ -19892,7 +19902,12 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op,
1989219902

1989319903
case GT_OR:
1989419904
{
19895-
if (simdSize == 32)
19905+
if (simdSize == 64)
19906+
{
19907+
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
19908+
intrinsic = NI_AVX512F_Or;
19909+
}
19910+
else if (simdSize == 32)
1989619911
{
1989719912
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
1989819913

@@ -19953,7 +19968,12 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op,
1995319968

1995419969
case GT_XOR:
1995519970
{
19956-
if (simdSize == 32)
19971+
if (simdSize == 64)
19972+
{
19973+
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
19974+
intrinsic = NI_AVX512F_Xor;
19975+
}
19976+
else if (simdSize == 32)
1995719977
{
1995819978
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
1995919979

@@ -23446,7 +23466,15 @@ GenTree* Compiler::gtNewSimdUnOpNode(genTreeOps op,
2344623466

2344723467
case GT_NOT:
2344823468
{
23449-
assert((simdSize != 32) || compIsaSupportedDebugOnly(InstructionSet_AVX));
23469+
if (simdSize == 64)
23470+
{
23471+
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
23472+
}
23473+
else if (simdSize == 32)
23474+
{
23475+
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
23476+
}
23477+
2345023478
op2 = gtNewAllBitsSetConNode(type);
2345123479
return gtNewSimdBinOpNode(GT_XOR, type, op1, op2, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
2345223480
}

src/coreclr/jit/hwintrinsic.cpp

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -632,7 +632,7 @@ static bool isSupportedBaseType(NamedIntrinsic intrinsic, CorInfoType baseJitTyp
632632
#ifdef DEBUG
633633
CORINFO_InstructionSet isa = HWIntrinsicInfo::lookupIsa(intrinsic);
634634
#ifdef TARGET_XARCH
635-
assert((isa == InstructionSet_Vector256) || (isa == InstructionSet_Vector128));
635+
assert((isa == InstructionSet_Vector512) || (isa == InstructionSet_Vector256) || (isa == InstructionSet_Vector128));
636636
#endif // TARGET_XARCH
637637
#ifdef TARGET_ARM64
638638
assert((isa == InstructionSet_Vector64) || (isa == InstructionSet_Vector128));
@@ -976,11 +976,23 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
976976

977977
assert(numArgs >= 0);
978978

979-
if (!isScalar && ((HWIntrinsicInfo::lookupIns(intrinsic, simdBaseType) == INS_invalid) ||
980-
((simdSize != 8) && (simdSize != 16) && (simdSize != 32))))
979+
if (!isScalar)
981980
{
982-
assert(!"Unexpected HW Intrinsic");
983-
return nullptr;
981+
if (HWIntrinsicInfo::lookupIns(intrinsic, simdBaseType) == INS_invalid)
982+
{
983+
assert(!"Unexpected HW intrinsic");
984+
return nullptr;
985+
}
986+
987+
#if defined(TARGET_ARM64)
988+
if ((simdSize != 8) && (simdSize != 16))
989+
#elif defined(TARGET_XARCH)
990+
if ((simdSize != 16) && (simdSize != 32) && (simdSize != 64))
991+
#endif // TARGET_*
992+
{
993+
assert(!"Unexpected SIMD size");
994+
return nullptr;
995+
}
984996
}
985997

986998
GenTree* op1 = nullptr;

0 commit comments

Comments
 (0)