Skip to content

Commit 7889f83

Browse files
committed
[WIP][DAG] getNode - freeze insert/compress node base values if the inserted/mask values are UNDEF
Ensure we don't expose any poison elements
1 parent ace5108 commit 7889f83

File tree

188 files changed

+37494
-29980
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

188 files changed

+37494
-29980
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27552,7 +27552,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
2755227552

2755327553
// If inserting an UNDEF, just return the original vector.
2755427554
if (N1.isUndef())
27555-
return N0;
27555+
return DAG.getFreeze(N0);
2755627556

2755727557
// If this is an insert of an extracted vector into an undef vector, we can
2755827558
// just use the input to the extract if the types match, and can simplify

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7962,7 +7962,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
79627962

79637963
// If the inserted element is an UNDEF, just use the input vector.
79647964
if (N2.isUndef())
7965-
return N1;
7965+
return getFreeze(N1);
79667966

79677967
break;
79687968
}
@@ -8001,7 +8001,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
80018001
// can just use the input to the extract.
80028002
if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
80038003
N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT)
8004-
return N2.getOperand(0);
8004+
return getFreeze(N2.getOperand(0));
80058005
break;
80068006
}
80078007
case ISD::BITCAST:
@@ -8028,7 +8028,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
80288028
"Vector and mask must have same number of elements.");
80298029

80308030
if (N1.isUndef() || N2.isUndef())
8031-
return N3;
8031+
return getFreeze(N3);
80328032

80338033
break;
80348034
}

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45044,7 +45044,9 @@ bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
4504445044

4504545045
switch (Op.getOpcode()) {
4504645046
case X86ISD::PSHUFD:
45047+
case X86ISD::BLENDI:
4504745048
case X86ISD::VPERMILPI:
45049+
case X86ISD::VPERMV:
4504845050
case X86ISD::VPERMV3: {
4504945051
SmallVector<int, 8> Mask;
4505045052
SmallVector<SDValue, 2> Ops;
@@ -45087,8 +45089,12 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode(
4508745089
case X86ISD::VSRLI:
4508845090
case X86ISD::VSRAI:
4508945091
return false;
45092+
case X86ISD::PACKSS:
45093+
case X86ISD::PACKUS:
4509045094
case X86ISD::PSHUFD:
45095+
case X86ISD::BLENDI:
4509145096
case X86ISD::VPERMILPI:
45097+
case X86ISD::VPERMV:
4509245098
case X86ISD::VPERMV3:
4509345099
case X86ISD::UNPCKH:
4509445100
case X86ISD::UNPCKL:

llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ define <vscale x 2 x i64> @dup_extract_nxv2i64_v2i64(<2 x i64> %data) {
132132
define <vscale x 2 x i64> @dup_extract_nxv2i64_v1i64(<1 x i64> %data) {
133133
; CHECK-LABEL: dup_extract_nxv2i64_v1i64:
134134
; CHECK: // %bb.0:
135-
; CHECK-NEXT: mov z0.d, x8
135+
; CHECK-NEXT: mov z0.d, d0
136136
; CHECK-NEXT: ret
137137
%1 = extractelement <1 x i64> %data, i64 1
138138
%.splatinsert = insertelement <vscale x 2 x i64> poison, i64 %1, i32 0

llvm/test/CodeGen/AArch64/active_lane_mask.ll

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -304,9 +304,10 @@ define <16 x i1> @lane_mask_v16i1_i8(i8 %index, i8 %TC) {
304304
;
305305
; CHECK-STREAMING-LABEL: lane_mask_v16i1_i8:
306306
; CHECK-STREAMING: // %bb.0:
307-
; CHECK-STREAMING-NEXT: index z0.b, w0, #1
307+
; CHECK-STREAMING-NEXT: index z0.b, #0, #1
308308
; CHECK-STREAMING-NEXT: mov z1.b, w0
309309
; CHECK-STREAMING-NEXT: ptrue p0.b, vl16
310+
; CHECK-STREAMING-NEXT: add z0.b, z1.b, z0.b
310311
; CHECK-STREAMING-NEXT: cmphi p1.b, p0/z, z1.b, z0.b
311312
; CHECK-STREAMING-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff
312313
; CHECK-STREAMING-NEXT: orr z0.d, z0.d, z1.d
@@ -331,9 +332,10 @@ define <8 x i1> @lane_mask_v8i1_i8(i8 %index, i8 %TC) {
331332
;
332333
; CHECK-STREAMING-LABEL: lane_mask_v8i1_i8:
333334
; CHECK-STREAMING: // %bb.0:
334-
; CHECK-STREAMING-NEXT: index z0.b, w0, #1
335+
; CHECK-STREAMING-NEXT: index z0.b, #0, #1
335336
; CHECK-STREAMING-NEXT: mov z1.b, w0
336337
; CHECK-STREAMING-NEXT: ptrue p0.b, vl8
338+
; CHECK-STREAMING-NEXT: add z0.b, z1.b, z0.b
337339
; CHECK-STREAMING-NEXT: cmphi p1.b, p0/z, z1.b, z0.b
338340
; CHECK-STREAMING-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff
339341
; CHECK-STREAMING-NEXT: orr z0.d, z0.d, z1.d
@@ -362,10 +364,11 @@ define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) {
362364
;
363365
; CHECK-STREAMING-LABEL: lane_mask_v4i1_i8:
364366
; CHECK-STREAMING: // %bb.0:
365-
; CHECK-STREAMING-NEXT: mov z1.h, w0
366367
; CHECK-STREAMING-NEXT: index z0.h, #0, #1
368+
; CHECK-STREAMING-NEXT: mov z1.h, w0
367369
; CHECK-STREAMING-NEXT: ptrue p0.h, vl4
368370
; CHECK-STREAMING-NEXT: and z1.h, z1.h, #0xff
371+
; CHECK-STREAMING-NEXT: mov z0.h, p0/m, z0.h
369372
; CHECK-STREAMING-NEXT: add z0.h, z1.h, z0.h
370373
; CHECK-STREAMING-NEXT: mov z1.h, w1
371374
; CHECK-STREAMING-NEXT: umin z0.h, z0.h, #255
@@ -394,10 +397,12 @@ define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) {
394397
;
395398
; CHECK-STREAMING-LABEL: lane_mask_v2i1_i8:
396399
; CHECK-STREAMING: // %bb.0:
400+
; CHECK-STREAMING-NEXT: index z0.s, #0, #1
397401
; CHECK-STREAMING-NEXT: and w8, w0, #0xff
398402
; CHECK-STREAMING-NEXT: ptrue p0.s, vl2
399-
; CHECK-STREAMING-NEXT: index z0.s, w8, #1
403+
; CHECK-STREAMING-NEXT: mov z1.s, w8
400404
; CHECK-STREAMING-NEXT: and w8, w1, #0xff
405+
; CHECK-STREAMING-NEXT: add z0.s, z1.s, z0.s
401406
; CHECK-STREAMING-NEXT: mov z1.s, w8
402407
; CHECK-STREAMING-NEXT: umin z0.s, z0.s, #255
403408
; CHECK-STREAMING-NEXT: cmphi p0.s, p0/z, z1.s, z0.s

llvm/test/CodeGen/AArch64/arm64-build-vector.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ define void @widen_f16_build_vector(ptr %addr) {
5757
; CHECK-LABEL: widen_f16_build_vector:
5858
; CHECK: // %bb.0:
5959
; CHECK-NEXT: mov w8, #13294 // =0x33ee
60-
; CHECK-NEXT: movk w8, #13294, lsl #16
61-
; CHECK-NEXT: str w8, [x0]
60+
; CHECK-NEXT: dup v0.4h, w8
61+
; CHECK-NEXT: str s0, [x0]
6262
; CHECK-NEXT: ret
6363
store <2 x half> <half 0xH33EE, half 0xH33EE>, ptr %addr, align 2
6464
ret void

llvm/test/CodeGen/AArch64/concat-vector-add-combine.ll

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -93,17 +93,14 @@ define i32 @combine_add_8xi32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i3
9393
define i32 @combine_undef_add_8xi32(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
9494
; CHECK-LABEL: combine_undef_add_8xi32:
9595
; CHECK: // %bb.0:
96-
; CHECK-NEXT: fmov s1, w0
97-
; CHECK-NEXT: movi v0.2d, #0000000000000000
98-
; CHECK-NEXT: mov v1.s[1], w1
99-
; CHECK-NEXT: uhadd v0.4h, v0.4h, v0.4h
100-
; CHECK-NEXT: mov v1.s[2], w2
101-
; CHECK-NEXT: mov v1.s[3], w3
102-
; CHECK-NEXT: xtn v2.4h, v1.4s
103-
; CHECK-NEXT: shrn v1.4h, v1.4s, #16
104-
; CHECK-NEXT: uhadd v1.4h, v2.4h, v1.4h
105-
; CHECK-NEXT: mov v1.d[1], v0.d[0]
106-
; CHECK-NEXT: uaddlv s0, v1.8h
96+
; CHECK-NEXT: fmov s0, w0
97+
; CHECK-NEXT: mov v0.s[1], w1
98+
; CHECK-NEXT: mov v0.s[2], w2
99+
; CHECK-NEXT: mov v0.s[3], w3
100+
; CHECK-NEXT: uzp2 v1.8h, v0.8h, v0.8h
101+
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h
102+
; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
103+
; CHECK-NEXT: uaddlv s0, v0.8h
107104
; CHECK-NEXT: fmov w0, s0
108105
; CHECK-NEXT: ret
109106
%a1 = insertelement <8 x i32> poison, i32 %a, i32 0

llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll

Lines changed: 52 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,9 @@ define void @fcvtzu_v16f32_v16i16(ptr %a, ptr %b) #0 {
374374
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
375375
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
376376
; VBITS_GE_512-NEXT: fcvtzu z0.s, p0/m, z0.s
377-
; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x1]
377+
; VBITS_GE_512-NEXT: ptrue p0.h, vl16
378+
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
379+
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
378380
; VBITS_GE_512-NEXT: ret
379381
%op1 = load <16 x float>, ptr %a
380382
%res = fptoui <16 x float> %op1 to <16 x i16>
@@ -388,7 +390,9 @@ define void @fcvtzu_v32f32_v32i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
388390
; CHECK-NEXT: ptrue p0.s, vl32
389391
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
390392
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
391-
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
393+
; CHECK-NEXT: ptrue p0.h, vl32
394+
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
395+
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
392396
; CHECK-NEXT: ret
393397
%op1 = load <32 x float>, ptr %a
394398
%res = fptoui <32 x float> %op1 to <32 x i16>
@@ -402,7 +406,9 @@ define void @fcvtzu_v64f32_v64i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
402406
; CHECK-NEXT: ptrue p0.s, vl64
403407
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
404408
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
405-
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
409+
; CHECK-NEXT: ptrue p0.h, vl64
410+
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
411+
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
406412
; CHECK-NEXT: ret
407413
%op1 = load <64 x float>, ptr %a
408414
%res = fptoui <64 x float> %op1 to <64 x i16>
@@ -684,7 +690,10 @@ define void @fcvtzu_v16f64_v16i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
684690
; CHECK-NEXT: ptrue p0.d, vl16
685691
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
686692
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
687-
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
693+
; CHECK-NEXT: ptrue p0.h, vl16
694+
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
695+
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
696+
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
688697
; CHECK-NEXT: ret
689698
%op1 = load <16 x double>, ptr %a
690699
%res = fptoui <16 x double> %op1 to <16 x i16>
@@ -698,7 +707,10 @@ define void @fcvtzu_v32f64_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
698707
; CHECK-NEXT: ptrue p0.d, vl32
699708
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
700709
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
701-
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
710+
; CHECK-NEXT: ptrue p0.h, vl32
711+
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
712+
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
713+
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
702714
; CHECK-NEXT: ret
703715
%op1 = load <32 x double>, ptr %a
704716
%res = fptoui <32 x double> %op1 to <32 x i16>
@@ -769,7 +781,9 @@ define void @fcvtzu_v8f64_v8i32(ptr %a, ptr %b) #0 {
769781
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
770782
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
771783
; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.d
772-
; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x1]
784+
; VBITS_GE_512-NEXT: ptrue p0.s, vl8
785+
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
786+
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
773787
; VBITS_GE_512-NEXT: ret
774788
%op1 = load <8 x double>, ptr %a
775789
%res = fptoui <8 x double> %op1 to <8 x i32>
@@ -783,7 +797,9 @@ define void @fcvtzu_v16f64_v16i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
783797
; CHECK-NEXT: ptrue p0.d, vl16
784798
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
785799
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
786-
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
800+
; CHECK-NEXT: ptrue p0.s, vl16
801+
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
802+
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
787803
; CHECK-NEXT: ret
788804
%op1 = load <16 x double>, ptr %a
789805
%res = fptoui <16 x double> %op1 to <16 x i32>
@@ -797,7 +813,9 @@ define void @fcvtzu_v32f64_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
797813
; CHECK-NEXT: ptrue p0.d, vl32
798814
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
799815
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
800-
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
816+
; CHECK-NEXT: ptrue p0.s, vl32
817+
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
818+
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
801819
; CHECK-NEXT: ret
802820
%op1 = load <32 x double>, ptr %a
803821
%res = fptoui <32 x double> %op1 to <32 x i32>
@@ -1267,7 +1285,9 @@ define void @fcvtzs_v16f32_v16i16(ptr %a, ptr %b) #0 {
12671285
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
12681286
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
12691287
; VBITS_GE_512-NEXT: fcvtzs z0.s, p0/m, z0.s
1270-
; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x1]
1288+
; VBITS_GE_512-NEXT: ptrue p0.h, vl16
1289+
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
1290+
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
12711291
; VBITS_GE_512-NEXT: ret
12721292
%op1 = load <16 x float>, ptr %a
12731293
%res = fptosi <16 x float> %op1 to <16 x i16>
@@ -1281,7 +1301,9 @@ define void @fcvtzs_v32f32_v32i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
12811301
; CHECK-NEXT: ptrue p0.s, vl32
12821302
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
12831303
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
1284-
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
1304+
; CHECK-NEXT: ptrue p0.h, vl32
1305+
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
1306+
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
12851307
; CHECK-NEXT: ret
12861308
%op1 = load <32 x float>, ptr %a
12871309
%res = fptosi <32 x float> %op1 to <32 x i16>
@@ -1295,7 +1317,9 @@ define void @fcvtzs_v64f32_v64i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
12951317
; CHECK-NEXT: ptrue p0.s, vl64
12961318
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
12971319
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
1298-
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
1320+
; CHECK-NEXT: ptrue p0.h, vl64
1321+
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
1322+
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
12991323
; CHECK-NEXT: ret
13001324
%op1 = load <64 x float>, ptr %a
13011325
%res = fptosi <64 x float> %op1 to <64 x i16>
@@ -1577,7 +1601,10 @@ define void @fcvtzs_v16f64_v16i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
15771601
; CHECK-NEXT: ptrue p0.d, vl16
15781602
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
15791603
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
1580-
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
1604+
; CHECK-NEXT: ptrue p0.h, vl16
1605+
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
1606+
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
1607+
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
15811608
; CHECK-NEXT: ret
15821609
%op1 = load <16 x double>, ptr %a
15831610
%res = fptosi <16 x double> %op1 to <16 x i16>
@@ -1591,7 +1618,10 @@ define void @fcvtzs_v32f64_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
15911618
; CHECK-NEXT: ptrue p0.d, vl32
15921619
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
15931620
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
1594-
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
1621+
; CHECK-NEXT: ptrue p0.h, vl32
1622+
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
1623+
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
1624+
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
15951625
; CHECK-NEXT: ret
15961626
%op1 = load <32 x double>, ptr %a
15971627
%res = fptosi <32 x double> %op1 to <32 x i16>
@@ -1662,7 +1692,9 @@ define void @fcvtzs_v8f64_v8i32(ptr %a, ptr %b) #0 {
16621692
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
16631693
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
16641694
; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.d
1665-
; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x1]
1695+
; VBITS_GE_512-NEXT: ptrue p0.s, vl8
1696+
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
1697+
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
16661698
; VBITS_GE_512-NEXT: ret
16671699
%op1 = load <8 x double>, ptr %a
16681700
%res = fptosi <8 x double> %op1 to <8 x i32>
@@ -1676,7 +1708,9 @@ define void @fcvtzs_v16f64_v16i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
16761708
; CHECK-NEXT: ptrue p0.d, vl16
16771709
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
16781710
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
1679-
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
1711+
; CHECK-NEXT: ptrue p0.s, vl16
1712+
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
1713+
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
16801714
; CHECK-NEXT: ret
16811715
%op1 = load <16 x double>, ptr %a
16821716
%res = fptosi <16 x double> %op1 to <16 x i32>
@@ -1690,7 +1724,9 @@ define void @fcvtzs_v32f64_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
16901724
; CHECK-NEXT: ptrue p0.d, vl32
16911725
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
16921726
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
1693-
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
1727+
; CHECK-NEXT: ptrue p0.s, vl32
1728+
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
1729+
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
16941730
; CHECK-NEXT: ret
16951731
%op1 = load <32 x double>, ptr %a
16961732
%res = fptosi <32 x double> %op1 to <32 x i32>

0 commit comments

Comments
 (0)