Skip to content

Commit da0aa0c

Browse files
Cleanup some xarch emit logic (#85536)
* Ensure floating-point codegen uses the VEX aware path * Fix `IF_RRW_RRW_CNS` to be `IF_RWR_RRD_CNS` * Fixup emitfmtsxarch.h to have a more consistent layout * Allow querying the scheduling info for an insFormat * Ensure the new insFormats are handled * Ensure we consistently use `emitInsModeFormat` * Ensure instructions which write to a mask register are EVEX only * Improve REX.W handling for EVEX only instructions * Ensure that instructions use the right update mode and tuple type * Apply formatting patch * Ensure DstSrcSrc is still handled correctly * Ensure BLSI/BLSR are still handled in emitOutputAM * Use static_assert_no_msg * Fixing the disassembly for IF_RRW_SHF * Fixing the IF check for shld/shrd on x86 * Use the correct name: inst_RV_TT_IV * Ensure the 4 operand insFormats include the necessary constant * Resolve an insFormat check on x86 * Ensure other SIMD code paths are VEX aware * Improve throughput by using a less expensive emitSizeOfInsDsc * Apply formatting patch * Ensure emitSizeOfInsDsc_CNS is used for RWR_RRD_*RD_CNS * Ensure genSimd12UpperClear uses `andps` for the pre-SSE4.1 path
1 parent b02d7a1 commit da0aa0c

17 files changed

+1644
-1311
lines changed

src/coreclr/jit/codegen.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ class CodeGen final : public CodeGenInterface
4646
CORINFO_FIELD_HANDLE absBitmaskFlt;
4747
CORINFO_FIELD_HANDLE absBitmaskDbl;
4848

49+
// Bit mask used in zeroing the 3rd element of a SIMD12
50+
CORINFO_FIELD_HANDLE zroSimd12Elm3;
51+
4952
// Bit mask used in U8 -> double conversion to adjust the result.
5053
CORINFO_FIELD_HANDLE u8ToDblBitmask;
5154

@@ -925,6 +928,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
925928
void genSimdUpperSave(GenTreeIntrinsic* node);
926929
void genSimdUpperRestore(GenTreeIntrinsic* node);
927930

931+
void genSimd12UpperClear(regNumber tgtReg);
932+
928933
// TYP_SIMD12 (i.e Vector3 of size 12 bytes) is not a hardware supported size and requires
929934
// two reads/writes on 64-bit targets. These routines abstract reading/writing of Vector3
930935
// values through an indirection. Note that Vector3 locals allocated on stack would have
@@ -1532,6 +1537,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
15321537
void inst_RV_RV_IV(instruction ins, emitAttr size, regNumber reg1, regNumber reg2, unsigned ival);
15331538
void inst_RV_TT_IV(instruction ins, emitAttr attr, regNumber reg1, GenTree* rmOp, int ival);
15341539
void inst_RV_RV_TT(instruction ins, emitAttr size, regNumber targetReg, regNumber op1Reg, GenTree* op2, bool isRMW);
1540+
void inst_RV_RV_TT_IV(
1541+
instruction ins, emitAttr size, regNumber targetReg, regNumber op1Reg, GenTree* op2, int8_t ival, bool isRMW);
15351542
#endif
15361543

15371544
void inst_set_SV_var(GenTree* tree);

src/coreclr/jit/codegencommon.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4474,7 +4474,7 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP&
44744474
}
44754475
#elif defined(TARGET_XARCH)
44764476
// XORPS is the fastest and smallest way to initialize a XMM register to zero.
4477-
inst_RV_RV(INS_xorps, reg, reg, TYP_DOUBLE);
4477+
GetEmitter()->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, reg, reg, reg);
44784478
dblInitReg = reg;
44794479
#elif defined(TARGET_ARM64)
44804480
// We will just zero out the entire vector register. This sets it to a double/float zero value
@@ -4514,7 +4514,7 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP&
45144514
}
45154515
#elif defined(TARGET_XARCH)
45164516
// XORPS is the fastest and smallest way to initialize a XMM register to zero.
4517-
inst_RV_RV(INS_xorps, reg, reg, TYP_DOUBLE);
4517+
GetEmitter()->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, reg, reg, reg);
45184518
fltInitReg = reg;
45194519
#elif defined(TARGET_ARM64)
45204520
// We will just zero out the entire vector register. This sets it to a double/float zero value

0 commit comments

Comments
 (0)