Skip to content

Commit 32ea339

Browse files
Fix a bug around Vector4.Distance and Sse41.Insert lowering (#81725)
* Adding a regression test for #81585 * Fixing a bug around Sse41.Insert lowering * Ensure that Distance/DistanceSquared are correctly imported * Account for another case around Sse41.Insert chain folding
1 parent 41772ba commit 32ea339

File tree

8 files changed

+135
-46
lines changed

8 files changed

+135
-46
lines changed

src/coreclr/jit/lowerxarch.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1314,10 +1314,14 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
13141314
// the zmask from op1. We expect that op2 has already been
13151315
// lowered and therefore the containment checks have happened
13161316

1317+
// Since this is a newer operation, we need to account for
1318+
// the possibility of `op1Intrinsic` zeroing the same element
1319+
// we're setting here.
1320+
13171321
assert(op1Intrinsic->Op(2)->isContained());
13181322

13191323
ssize_t op1Ival = op1Idx->AsIntConCommon()->IconValue();
1320-
ival |= (op1Ival & 0x0F);
1324+
ival |= ((op1Ival & 0x0F) & ~(1 << count_d));
13211325
op3->AsIntConCommon()->SetIconValue(ival);
13221326

13231327
// Then we'll just carry the original non-zero input and
@@ -1335,6 +1339,8 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
13351339
// account, we can basically do the same thing here by merging this
13361340
// zmask into the ival from op1.
13371341

1342+
// Since this is a later op, direct merging is safe
1343+
13381344
ssize_t op1Ival = op1Idx->AsIntConCommon()->IconValue();
13391345
ival = op1Ival | zmask;
13401346
op3->AsIntConCommon()->SetIconValue(ival);

src/coreclr/jit/simdashwintrinsic.cpp

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -946,39 +946,6 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic,
946946
/* isSimdAsHWIntrinsic */ true);
947947
}
948948

949-
case NI_Vector2_Distance:
950-
case NI_Vector3_Distance:
951-
case NI_Vector4_Distance:
952-
{
953-
op1 = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseJitType, simdSize,
954-
/* isSimdAsHWIntrinsic */ true);
955-
956-
GenTree* clonedOp1;
957-
op1 = impCloneExpr(op1, &clonedOp1, NO_CLASS_HANDLE, CHECK_SPILL_ALL,
958-
nullptr DEBUGARG("Clone diff for vector distance"));
959-
960-
op1 = gtNewSimdDotProdNode(retType, op1, clonedOp1, simdBaseJitType, simdSize,
961-
/* isSimdAsHWIntrinsic */ true);
962-
963-
return new (this, GT_INTRINSIC)
964-
GenTreeIntrinsic(simdBaseType, op1, NI_System_Math_Sqrt, NO_METHOD_HANDLE);
965-
}
966-
967-
case NI_Vector2_DistanceSquared:
968-
case NI_Vector3_DistanceSquared:
969-
case NI_Vector4_DistanceSquared:
970-
{
971-
op1 = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseJitType, simdSize,
972-
/* isSimdAsHWIntrinsic */ true);
973-
974-
GenTree* clonedOp1;
975-
op1 = impCloneExpr(op1, &clonedOp1, NO_CLASS_HANDLE, CHECK_SPILL_ALL,
976-
nullptr DEBUGARG("Clone diff for vector distance squared"));
977-
978-
return gtNewSimdDotProdNode(retType, op1, clonedOp1, simdBaseJitType, simdSize,
979-
/* isSimdAsHWIntrinsic */ true);
980-
}
981-
982949
case NI_VectorT128_Floor:
983950
#if defined(TARGET_XARCH)
984951
case NI_VectorT256_Floor:
@@ -1382,6 +1349,39 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic,
13821349
break;
13831350
}
13841351

1352+
case NI_Vector2_Distance:
1353+
case NI_Vector3_Distance:
1354+
case NI_Vector4_Distance:
1355+
{
1356+
op1 = gtNewSimdBinOpNode(GT_SUB, simdType, op1, op2, simdBaseJitType, simdSize,
1357+
/* isSimdAsHWIntrinsic */ true);
1358+
1359+
GenTree* clonedOp1;
1360+
op1 = impCloneExpr(op1, &clonedOp1, NO_CLASS_HANDLE, CHECK_SPILL_ALL,
1361+
nullptr DEBUGARG("Clone diff for vector distance"));
1362+
1363+
op1 = gtNewSimdDotProdNode(retType, op1, clonedOp1, simdBaseJitType, simdSize,
1364+
/* isSimdAsHWIntrinsic */ true);
1365+
1366+
return new (this, GT_INTRINSIC)
1367+
GenTreeIntrinsic(retType, op1, NI_System_Math_Sqrt, NO_METHOD_HANDLE);
1368+
}
1369+
1370+
case NI_Vector2_DistanceSquared:
1371+
case NI_Vector3_DistanceSquared:
1372+
case NI_Vector4_DistanceSquared:
1373+
{
1374+
op1 = gtNewSimdBinOpNode(GT_SUB, simdType, op1, op2, simdBaseJitType, simdSize,
1375+
/* isSimdAsHWIntrinsic */ true);
1376+
1377+
GenTree* clonedOp1;
1378+
op1 = impCloneExpr(op1, &clonedOp1, NO_CLASS_HANDLE, CHECK_SPILL_ALL,
1379+
nullptr DEBUGARG("Clone diff for vector distance squared"));
1380+
1381+
return gtNewSimdDotProdNode(retType, op1, clonedOp1, simdBaseJitType, simdSize,
1382+
/* isSimdAsHWIntrinsic */ true);
1383+
}
1384+
13851385
case NI_Quaternion_Divide:
13861386
case NI_Vector2_Divide:
13871387
case NI_Vector2_op_Division:

0 commit comments

Comments
 (0)