Skip to content

Commit 51bb6c3

Browse files
committed
Fix a bug in lowerxarch related to merging Sse41.Insert chains
1 parent b85156c commit 51bb6c3

File tree

1 file changed

+20
-3
lines changed

1 file changed

+20
-3
lines changed

src/coreclr/jit/lowerxarch.cpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2809,6 +2809,9 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
28092809

28102810
if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE41))
28112811
{
2812+
assert(argCnt <= 4);
2813+
GenTree* insertedNodes[4];
2814+
28122815
for (N = 1; N < argCnt - 1; N++)
28132816
{
28142817
// We will be constructing the following parts:
@@ -2837,10 +2840,12 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
28372840
idx = comp->gtNewIconNode(N << 4, TYP_INT);
28382841
BlockRange().InsertAfter(tmp2, idx);
28392842

2840-
tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, tmp2, idx, NI_SSE41_Insert, simdBaseJitType,
2843+
tmp3 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, tmp2, idx, NI_SSE41_Insert, simdBaseJitType,
28412844
simdSize);
2842-
BlockRange().InsertAfter(idx, tmp1);
2843-
LowerNode(tmp1);
2845+
BlockRange().InsertAfter(idx, tmp3);
2846+
2847+
insertedNodes[N] = tmp3;
2848+
tmp1 = tmp3;
28442849
}
28452850

28462851
// We will be constructing the following parts:
@@ -2868,6 +2873,18 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
28682873
BlockRange().InsertAfter(tmp2, idx);
28692874

28702875
node->ResetHWIntrinsicId(NI_SSE41_Insert, comp, tmp1, tmp2, idx);
2876+
2877+
for (N = 1; N < argCnt - 1; N++)
2878+
{
2879+
// LowerNode for NI_SSE41_Insert specially handles zeros, constants, and certain mask values
2880+
// to do the minimal number of operations and may merge together two neighboring inserts that
2881+
// don't have any side effects between them. Because of this and because of the interdependence
2882+
// of the inserts we've created above, we need to wait to lower the generated inserts until after
2883+
// we've completed the chain.
2884+
2885+
GenTree* insertedNode = insertedNodes[N];
2886+
LowerNode(insertedNode);
2887+
}
28712888
break;
28722889
}
28732890

0 commit comments

Comments
 (0)