@@ -2809,6 +2809,9 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
2809
2809
2810
2810
if (comp->compOpportunisticallyDependsOn (InstructionSet_SSE41))
2811
2811
{
2812
+ assert (argCnt <= 4 );
2813
+ GenTree* insertedNodes[4 ];
2814
+
2812
2815
for (N = 1 ; N < argCnt - 1 ; N++)
2813
2816
{
2814
2817
// We will be constructing the following parts:
@@ -2837,10 +2840,12 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
2837
2840
idx = comp->gtNewIconNode (N << 4 , TYP_INT);
2838
2841
BlockRange ().InsertAfter (tmp2, idx);
2839
2842
2840
- tmp1 = comp->gtNewSimdHWIntrinsicNode (simdType, tmp1, tmp2, idx, NI_SSE41_Insert, simdBaseJitType,
2843
+ tmp3 = comp->gtNewSimdHWIntrinsicNode (simdType, tmp1, tmp2, idx, NI_SSE41_Insert, simdBaseJitType,
2841
2844
simdSize);
2842
- BlockRange ().InsertAfter (idx, tmp1);
2843
- LowerNode (tmp1);
2845
+ BlockRange ().InsertAfter (idx, tmp3);
2846
+
2847
+ insertedNodes[N] = tmp3;
2848
+ tmp1 = tmp3;
2844
2849
}
2845
2850
2846
2851
// We will be constructing the following parts:
@@ -2868,6 +2873,18 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
2868
2873
BlockRange ().InsertAfter (tmp2, idx);
2869
2874
2870
2875
node->ResetHWIntrinsicId (NI_SSE41_Insert, comp, tmp1, tmp2, idx);
2876
+
2877
+ for (N = 1 ; N < argCnt - 1 ; N++)
2878
+ {
2879
+ // LowerNode for NI_SSE41_Insert specially handles zeros, constants, and certain mask values
2880
+ // to do the minimal number of operations and may merge together two neighboring inserts that
2881
+ // don't have any side effects between them. Because of this and because of the interdependence
2882
+ // of the inserts we've created above, we need to wait to lower the generated inserts until after
2883
+ // we've completed the chain.
2884
+
2885
+ GenTree* insertedNode = insertedNodes[N];
2886
+ LowerNode (insertedNode);
2887
+ }
2871
2888
break ;
2872
2889
}
2873
2890
0 commit comments